From: Vladimir Davydov <vdavy...@parallels.com>

This is a backport of diff-sched-account-ctxsw-per-task-group:

 Subject: sched: account ctxsw per task group
 Date: Fri, 28 Dec 2012 15:09:45 +0400

* [sched] the number of context switches should be reported correctly
        inside a CT in /proc/stat (PSBM-18113)

For /proc/stat:ctxt to be correct inside containers.

https://jira.sw.ru/browse/PSBM-18113

Signed-off-by: Vladimir Davydov <vdavy...@parallels.com>

(cherry picked from vz7 commit d388f0bf64adb74cd62c4deff58e181bd63d62ac)
Signed-off-by: Konstantin Khorenko <khore...@virtuozzo.com>
Reviewed-by: Andrey Ryabinin <aryabi...@virtuozzo.com>

+++
sched: Port diff-sched-clear-prev-entity-if-curr-is-dequeued

Author: Vladimir Davydov
Email: vdavy...@parallels.com
Subject: sched: clear prev entity if curr is dequeued
Date: Fri, 20 Sep 2013 16:55:23 +0400

cfs_rq->prev is used for ctxsw accounting: on put_prev_entity()
cfs_rq->prev is set to curr if curr is on rq, and on set_next_entity()
nr_switches is increased if curr != prev. However, If the current task
goes to sleep, cfs_rq->prev won't be updated, which will lead to a ctxsw
not accounted. The patch fixes this by clearing cfs_rq->prev if current
is not on rq.

Signed-off-by: Vladimir Davydov <vdavy...@parallels.com>

=============================================================================

Related to https://jira.sw.ru/browse/PSBM-33642

Signed-off-by: Vladimir Davydov <vdavy...@parallels.com>

https://jira.sw.ru/browse/PSBM-127780
(cherry-picked from vz7 commit 409c82642d4f ("sched: Port
diff-sched-clear-prev-entity-if-curr-is-dequeued"))
mFixes: dc03756f0c397 ("sched/stat: account ctxsw per task group")

Signed-off-by: Pavel Tikhomirov <ptikhomi...@virtuozzo.com>

(cherry-picked from vz8 commit 5345b3e5cd7b ("sched/stat: account
ctxsw per task group"))

Signed-off-by: Nikita Yushchenko <nikita.yushche...@virtuozzo.com>
---
 kernel/sched/cpuacct.c |  4 +++-
 kernel/sched/fair.c    | 17 ++++++++++++++---
 kernel/sched/sched.h   |  3 +++
 3 files changed, 20 insertions(+), 4 deletions(-)

diff --git a/kernel/sched/cpuacct.c b/kernel/sched/cpuacct.c
index 871b6f8ccb0d..a409c441aba3 100644
--- a/kernel/sched/cpuacct.c
+++ b/kernel/sched/cpuacct.c
@@ -649,6 +649,7 @@ int cpu_cgroup_proc_stat(struct cgroup_subsys_state 
*cpu_css,
        struct kernel_cpustat *kcpustat;
        unsigned long tg_nr_running = 0;
        unsigned long tg_nr_iowait = 0;
+       unsigned long long tg_nr_switches = 0;
 
        time_ns = ve_get_time_ns(get_exec_env());
        if (time_ns) {
@@ -672,6 +673,7 @@ int cpu_cgroup_proc_stat(struct cgroup_subsys_state 
*cpu_css,
 #ifdef CONFIG_FAIR_GROUP_SCHED
                tg_nr_running += tg->cfs_rq[i]->h_nr_running;
                tg_nr_iowait  += tg->cfs_rq[i]->nr_iowait;
+               tg_nr_switches += tg->cfs_rq[i]->nr_switches;
 #endif
 #ifdef CONFIG_RT_GROUP_SCHED
                tg_nr_running += tg->rt_rq[i]->rt_nr_running;
@@ -745,7 +747,7 @@ int cpu_cgroup_proc_stat(struct cgroup_subsys_state 
*cpu_css,
                   "processes %lu\n"
                   "procs_running %lu\n"
                   "procs_blocked %lu\n",
-                  nr_context_switches(),
+                  tg_nr_switches,
                   (unsigned long long)boottime.tv_sec,
                   total_forks,
                   tg_nr_running,
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index c0bf49db868d..b8de26a5629d 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -4488,6 +4488,9 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity 
*se, int flags)
 
        clear_buddies(cfs_rq, se);
 
+       if (cfs_rq->prev == se)
+               cfs_rq->prev = NULL;
+
        if (se != cfs_rq->curr)
                __dequeue_entity(cfs_rq, se);
        se->on_rq = 0;
@@ -4502,8 +4505,12 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct 
sched_entity *se, int flags)
        if (!(flags & DEQUEUE_SLEEP))
                se->vruntime -= cfs_rq->min_vruntime;
 
-       /* return excess runtime on last dequeue */
-       return_cfs_rq_runtime(cfs_rq);
+       if (!cfs_rq->nr_running) {
+               /* return excess runtime on last dequeue */
+               return_cfs_rq_runtime(cfs_rq);
+               /* account switch to idle task */
+               cfs_rq->nr_switches++;
+       }
 
        update_cfs_group(se);
 
@@ -4579,6 +4586,8 @@ set_next_entity(struct cfs_rq *cfs_rq, struct 
sched_entity *se)
 
        update_stats_curr_start(cfs_rq, se);
        cfs_rq->curr = se;
+       if (cfs_rq->prev != se)
+               cfs_rq->nr_switches++;
 
        /*
         * Track our maximum slice length, if the CPU's load is at
@@ -4676,7 +4685,9 @@ static void put_prev_entity(struct cfs_rq *cfs_rq, struct 
sched_entity *prev)
                __enqueue_entity(cfs_rq, prev);
                /* in !on_rq case, update occurred at dequeue */
                update_load_avg(cfs_rq, prev, 0);
-       }
+               cfs_rq->prev = prev;
+       } else
+               cfs_rq->prev = NULL;
        cfs_rq->curr = NULL;
 }
 
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 13c1941d0593..5042f45bf887 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -579,6 +579,9 @@ struct cfs_rq {
        struct sched_entity     *next;
        struct sched_entity     *last;
        struct sched_entity     *skip;
+       struct sched_entity     *prev;
+
+       u64 nr_switches;
 
 #ifdef CONFIG_SCHED_DEBUG
        unsigned int            nr_spread_over;
-- 
2.30.2

_______________________________________________
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

Reply via email to