Currently, the decayed values of previous periods can spill into the lower 10 bits of runnable_avg_period. This makes the next period to be less than 1024 us. If we want to decay exactly every 1024us, which I see no reason not to (less math overhead and consistent decay period among all tasks), we can use a separate field to track how much time the current period has elapsed instead of overloading runnable_avg_period. This patch achieves this.
Signed-off-by: Zhihui Zhang <zzhs...@gmail.com> --- include/linux/sched.h | 2 +- kernel/sched/fair.c | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index 8db31ef..fa6b23b 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1116,7 +1116,7 @@ struct sched_avg { * above by 1024/(1-y). Thus we only need a u32 to store them for all * choices of y < 1-2^(-32)*1024. */ - u32 runnable_avg_sum, runnable_avg_period; + u32 accrue, runnable_avg_sum, runnable_avg_period; u64 last_runnable_update; s64 decay_count; unsigned long load_avg_contrib; diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index df2cdf7..c87ecf5 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -676,6 +676,7 @@ void init_task_runnable_average(struct task_struct *p) { u32 slice; + p->se.avg.accrue = 0; p->se.avg.decay_count = 0; slice = sched_slice(task_cfs_rq(p), &p->se) >> 10; p->se.avg.runnable_avg_sum = slice; @@ -2526,11 +2527,12 @@ static __always_inline int __update_entity_runnable_avg(u64 now, sa->last_runnable_update = now; /* delta_w is the amount already accumulated against our next period */ - delta_w = sa->runnable_avg_period % 1024; + delta_w = sa->accrue; if (delta + delta_w >= 1024) { /* period roll-over */ decayed = 1; + sa->accrue = 0; /* * Now that we know we're crossing a period boundary, figure * out how much from delta we need to complete the current @@ -2558,6 +2560,7 @@ static __always_inline int __update_entity_runnable_avg(u64 now, sa->runnable_avg_sum += runnable_contrib; sa->runnable_avg_period += runnable_contrib; } + sa->accrue += delta; /* Remainder of delta accrued against u_0` */ if (runnable) -- 1.9.1 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/