The scaling mechanism might sometimes cause top to report >100%
(sometimes > 1000%) cpu usage for a single thread. This patch makes
sure that stime+utime corresponds to the actual runtime of the thread.

Signed-off-by: Fredrik Markstrom <fredrik.markst...@gmail.com>
---
 kernel/sched/cputime.c | 46 +++++++++++++++++++---------------------------
 1 file changed, 19 insertions(+), 27 deletions(-)

diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index f5a64ff..2d168c8 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -554,22 +554,7 @@ drop_precision:
        return (__force cputime_t) scaled;
 }
 
-/*
- * Atomically advance counter to the new value. Interrupts, vcpu
- * scheduling, and scaling inaccuracies can cause cputime_advance
- * to be occasionally called with a new value smaller than counter.
- * Let's enforce atomicity.
- *
- * Normally a caller will only go through this loop once, or not
- * at all in case a previous caller updated counter the same jiffy.
- */
-static void cputime_advance(cputime_t *counter, cputime_t new)
-{
-       cputime_t old;
-
-       while (new > (old = READ_ONCE(*counter)))
-               cmpxchg_cputime(counter, old, new);
-}
+static DEFINE_SPINLOCK(prev_time_lock);
 
 /*
  * Adjust tick based cputime random precision against scheduler
@@ -590,17 +575,11 @@ static void cputime_adjust(struct task_cputime *curr,
         *
         * Fix this by scaling these tick based values against the total
         * runtime accounted by the CFS scheduler.
+        * In addition make sure the reported stime+utime equals rtime
+        * so that the total runtime reported is correct.
         */
        rtime = nsecs_to_cputime(curr->sum_exec_runtime);
 
-       /*
-        * Update userspace visible utime/stime values only if actual execution
-        * time is bigger than already exported. Note that can happen, that we
-        * provided bigger values due to scaling inaccuracy on big numbers.
-        */
-       if (prev->stime + prev->utime >= rtime)
-               goto out;
-
        stime = curr->stime;
        utime = curr->utime;
 
@@ -616,12 +595,25 @@ static void cputime_adjust(struct task_cputime *curr,
                utime = rtime - stime;
        }
 
-       cputime_advance(&prev->stime, stime);
-       cputime_advance(&prev->utime, utime);
+       spin_lock(&prev_time_lock);
+       if (stime < prev->stime) {
+               stime = prev->stime;
+               utime = rtime - stime;
+       } else if (utime < prev->utime) {
+               utime = prev->utime;
+               stime = rtime - utime;
+       }
+       WARN_ON(stime < prev->stime);
+       WARN_ON(utime < prev->utime);
+       WARN_ON(stime + utime != rtime);
 
-out:
+       if (prev->stime + prev->utime < rtime) {
+               prev->stime = stime;
+               prev->utime = utime;
+       }
        *ut = prev->utime;
        *st = prev->stime;
+       spin_unlock(&prev_time_lock);
 }
 
 void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st)
-- 
1.9.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to