Aaaaand it didn't take long for me to find a small bug... attached is a fixed version of the patch. Such things happen if one decides to regenerate a patch "just in case" and forgets to revert to a working version before doing that :D
-- Gregor Best Index: sys/proc.h =================================================================== RCS file: /cvs/src/sys/sys/proc.h,v retrieving revision 1.149 diff -u -r1.149 proc.h --- sys/proc.h 7 Jan 2012 05:38:12 -0000 1.149 +++ sys/proc.h 17 Jan 2012 16:10:09 -0000 @@ -43,6 +43,7 @@ #include <machine/proc.h> /* Machine-dependent proc substruct. */ #include <sys/selinfo.h> /* For struct selinfo */ #include <sys/queue.h> +#include <sys/tree.h> #include <sys/timeout.h> /* For struct timeout */ #include <sys/event.h> /* For struct klist */ #include <sys/mutex.h> /* For struct mutex */ @@ -210,7 +211,9 @@ #define PS_SINGLEUNWIND _P_SINGLEUNWIND struct proc { - TAILQ_ENTRY(proc) p_runq; + RB_ENTRY(proc) p_runq; + RB_ENTRY(proc) p_expq; + TAILQ_ENTRY(proc) p_slpq; LIST_ENTRY(proc) p_list; /* List of all processes. */ struct process *p_p; /* The process of this thread. */ @@ -251,6 +254,9 @@ #endif /* scheduling */ + struct timeval p_deadline; /* virtual deadline used for scheduling */ + struct timeval p_deadline_set; /* time at which the deadline was set */ + int p_rrticks; /* number of ticks this process is allowed to stay on a processor */ u_int p_estcpu; /* Time averaged value of p_cpticks. */ int p_cpticks; /* Ticks of cpu time. */ fixpt_t p_pctcpu; /* %cpu for this process during p_swtime */ Index: sys/sched.h =================================================================== RCS file: /cvs/src/sys/sys/sched.h,v retrieving revision 1.30 diff -u -r1.30 sched.h --- sys/sched.h 16 Nov 2011 20:50:19 -0000 1.30 +++ sys/sched.h 17 Jan 2012 16:10:09 -0000 @@ -87,8 +87,6 @@ #define CP_IDLE 4 #define CPUSTATES 5 -#define SCHED_NQS 32 /* 32 run queues. */ - /* * Per-CPU scheduler state. * XXX - expose to userland for now. @@ -99,7 +97,6 @@ u_int spc_schedticks; /* ticks for schedclock() */ u_int64_t spc_cp_time[CPUSTATES]; /* CPU state statistics */ u_char spc_curpriority; /* usrpri of curproc */ - int spc_rrticks; /* ticks until roundrobin() */ int spc_pscnt; /* prof/stat counter */ int spc_psdiv; /* prof/stat divisor */ struct proc *spc_idleproc; /* idle proc for this cpu */ @@ -107,9 +104,6 @@ u_int spc_nrun; /* procs on the run queues */ fixpt_t spc_ldavg; /* shortest load avg. for this cpu */ - TAILQ_HEAD(prochead, proc) spc_qs[SCHED_NQS]; - volatile uint32_t spc_whichqs; - #ifdef notyet struct proc *spc_reaper; /* dead proc reaper */ #endif @@ -119,18 +113,16 @@ #ifdef _KERNEL /* spc_flags */ -#define SPCF_SEENRR 0x0001 /* process has seen roundrobin() */ -#define SPCF_SHOULDYIELD 0x0002 /* process should yield the CPU */ -#define SPCF_SWITCHCLEAR (SPCF_SEENRR|SPCF_SHOULDYIELD) -#define SPCF_SHOULDHALT 0x0004 /* CPU should be vacated */ -#define SPCF_HALTED 0x0008 /* CPU has been halted */ +#define SPCF_SHOULDYIELD 0x0001 /* process should yield the CPU */ +#define SPCF_SHOULDHALT 0x0002 /* CPU should be vacated */ +#define SPCF_HALTED 0x0004 /* CPU has been halted */ -#define SCHED_PPQ (128 / SCHED_NQS) /* priorities per queue */ #define NICE_WEIGHT 2 /* priorities per nice level */ -#define ESTCPULIM(e) min((e), NICE_WEIGHT * PRIO_MAX - SCHED_PPQ) +#define ESTCPULIM(e) min((e), NICE_WEIGHT * PRIO_MAX) extern int schedhz; /* ideally: 16 */ extern int rrticks_init; /* ticks per roundrobin() */ +extern struct cpuset sched_idle_cpus; struct proc; void schedclock(struct proc *); @@ -147,18 +139,20 @@ void cpu_switchto(struct proc *, struct proc *); struct proc *sched_chooseproc(void); struct cpu_info *sched_choosecpu(struct proc *); -struct cpu_info *sched_choosecpu_fork(struct proc *parent, int); +struct cpu_info *sched_choosecpu_fork(struct proc *parent); void cpu_idle_enter(void); void cpu_idle_cycle(void); void cpu_idle_leave(void); void sched_peg_curproc(struct cpu_info *ci); +void generate_deadline(struct proc *, char); + #ifdef MULTIPROCESSOR void sched_start_secondary_cpus(void); void sched_stop_secondary_cpus(void); #endif -#define curcpu_is_idle() (curcpu()->ci_schedstate.spc_whichqs == 0) +#define curcpu_is_idle() (cpuset_isset(&sched_idle_cpus, curcpu())) void sched_init_runqueues(void); void setrunqueue(struct proc *); Index: kern/kern_clock.c =================================================================== RCS file: /cvs/src/sys/kern/kern_clock.c,v retrieving revision 1.72 diff -u -r1.72 kern_clock.c --- kern/kern_clock.c 7 Mar 2011 07:07:13 -0000 1.72 +++ kern/kern_clock.c 17 Jan 2012 16:10:10 -0000 @@ -241,7 +241,11 @@ if (stathz == 0) statclock(frame); - if (--ci->ci_schedstate.spc_rrticks <= 0) + /* + * If the currently running process went over its round robin tick quota, + * preempt it. + */ + if (p && (--(p->p_rrticks) <= 0)) roundrobin(ci); /* Index: kern/kern_fork.c =================================================================== RCS file: /cvs/src/sys/kern/kern_fork.c,v retrieving revision 1.133 diff -u -r1.133 kern_fork.c --- kern/kern_fork.c 14 Dec 2011 07:32:16 -0000 1.133 +++ kern/kern_fork.c 17 Jan 2012 16:10:10 -0000 @@ -225,6 +225,9 @@ atomic_setbits_int(&pr->ps_flags, PS_CONTROLT); p->p_p = pr; + + /* Pretend we preempted this new process */ + generate_deadline(p, 0); } /* print the 'table full' message once per 10 seconds */ @@ -485,7 +488,7 @@ getmicrotime(&p->p_stats->p_start); p->p_acflag = AFORK; p->p_stat = SRUN; - p->p_cpu = sched_choosecpu_fork(curp, flags); + p->p_cpu = sched_choosecpu_fork(curp); setrunqueue(p); SCHED_UNLOCK(s); Index: kern/kern_proc.c =================================================================== RCS file: /cvs/src/sys/kern/kern_proc.c,v retrieving revision 1.47 diff -u -r1.47 kern_proc.c --- kern/kern_proc.c 18 Sep 2011 23:20:54 -0000 1.47 +++ kern/kern_proc.c 17 Jan 2012 16:10:10 -0000 @@ -398,8 +398,6 @@ p->p_comm, p->p_pid, pst, p->p_flag, P_BITS); (*pr)(" pri=%u, usrpri=%u, nice=%d\n", p->p_priority, p->p_usrpri, p->p_p->ps_nice); - (*pr)(" forw=%p, list=%p,%p\n", - TAILQ_NEXT(p, p_runq), p->p_list.le_next, p->p_list.le_prev); (*pr)(" process=%p user=%p, vmspace=%p\n", p->p_p, p->p_addr, p->p_vmspace); (*pr)(" estcpu=%u, cpticks=%d, pctcpu=%u.%u, swtime=%u\n", Index: kern/kern_sched.c =================================================================== RCS file: /cvs/src/sys/kern/kern_sched.c,v retrieving revision 1.24 diff -u -r1.24 kern_sched.c --- kern/kern_sched.c 12 Oct 2011 18:30:09 -0000 1.24 +++ kern/kern_sched.c 17 Jan 2012 16:10:10 -0000 @@ -19,6 +19,7 @@ #include <sys/sched.h> #include <sys/proc.h> +#include <sys/tree.h> #include <sys/kthread.h> #include <sys/systm.h> #include <sys/resourcevar.h> @@ -32,9 +33,11 @@ void sched_kthreads_create(void *); -int sched_proc_to_cpu_cost(struct cpu_info *ci, struct proc *p); struct proc *sched_steal_proc(struct cpu_info *); +RB_HEAD(runqueue, proc) sched_runqueue; +RB_HEAD(expqueue, proc) sched_expqueue; + /* * To help choosing which cpu should run which process we keep track * of cpus which are currently idle and which cpus have processes @@ -61,6 +64,27 @@ * interrupts during the context switch. */ +static int +sched_cmp_proc(struct proc *a, struct proc *b) { + if (a == b) + return 0; + if (timercmp(&(a->p_deadline), &(b->p_deadline), <)) + return -1; + return 1; +} + +static int +sched_cmp_proc_exp(struct proc *a, struct proc *b) { + if (a == b) + return 0; + if (timercmp(&(a->p_deadline_set), &(b->p_deadline_set), <)) + return -1; + return 1; +} + +RB_GENERATE_STATIC(runqueue, proc, p_runq, sched_cmp_proc); +RB_GENERATE_STATIC(expqueue, proc, p_expq, sched_cmp_proc_exp); + /* * sched_init_cpu is called from main() for the boot cpu, then it's the * responsibility of the MD code to call it for all other cpus. @@ -69,10 +93,6 @@ sched_init_cpu(struct cpu_info *ci) { struct schedstate_percpu *spc = &ci->ci_schedstate; - int i; - - for (i = 0; i < SCHED_NQS; i++) - TAILQ_INIT(&spc->spc_qs[i]); spc->spc_idleproc = NULL; @@ -106,6 +126,7 @@ { struct schedstate_percpu *spc; struct proc *p = curproc; + struct proc *dead; struct cpu_info *ci = v; int s; @@ -120,7 +141,6 @@ SCHED_LOCK(s); cpuset_add(&sched_idle_cpus, ci); p->p_stat = SSLEEP; - p->p_cpu = ci; atomic_setbits_int(&p->p_flag, P_CPUPEG); mi_switch(); cpuset_del(&sched_idle_cpus, ci); @@ -130,38 +150,27 @@ KASSERT(curproc == spc->spc_idleproc); while (1) { - while (!curcpu_is_idle()) { - struct proc *dead; - - SCHED_LOCK(s); - p->p_stat = SSLEEP; - mi_switch(); - SCHED_UNLOCK(s); - - while ((dead = LIST_FIRST(&spc->spc_deadproc))) { + while ((dead = LIST_FIRST(&spc->spc_deadproc))) { LIST_REMOVE(dead, p_hash); exit2(dead); - } } + if ((spc->spc_schedflags & SPCF_SHOULDHALT) && !(spc->spc_schedflags & SPCF_HALTED)) + atomic_setbits_int(&spc->spc_schedflags, SPCF_HALTED); splassert(IPL_NONE); cpuset_add(&sched_idle_cpus, ci); + cpu_idle_enter(); - while (spc->spc_whichqs == 0) { - if (spc->spc_schedflags & SPCF_SHOULDHALT && - (spc->spc_schedflags & SPCF_HALTED) == 0) { - cpuset_del(&sched_idle_cpus, ci); - SCHED_LOCK(s); - atomic_setbits_int(&spc->spc_schedflags, - spc->spc_whichqs ? 0 : SPCF_HALTED); - SCHED_UNLOCK(s); - wakeup(spc); - } - cpu_idle_cycle(); - } + cpu_idle_cycle(); cpu_idle_leave(); + cpuset_del(&sched_idle_cpus, ci); + + SCHED_LOCK(s); + p->p_stat = SSLEEP; + mi_switch(); + SCHED_UNLOCK(s); } } @@ -206,63 +215,35 @@ void sched_init_runqueues(void) { + RB_INIT(&sched_runqueue); + RB_INIT(&sched_expqueue); } void setrunqueue(struct proc *p) { - struct schedstate_percpu *spc; - int queue = p->p_priority >> 2; - SCHED_ASSERT_LOCKED(); - spc = &p->p_cpu->ci_schedstate; - spc->spc_nrun++; - - TAILQ_INSERT_TAIL(&spc->spc_qs[queue], p, p_runq); - spc->spc_whichqs |= (1 << queue); - cpuset_add(&sched_queued_cpus, p->p_cpu); - - if (cpuset_isset(&sched_idle_cpus, p->p_cpu)) - cpu_unidle(p->p_cpu); + RB_INSERT(runqueue, &sched_runqueue, p); } void remrunqueue(struct proc *p) { - struct schedstate_percpu *spc; - int queue = p->p_priority >> 2; - SCHED_ASSERT_LOCKED(); - spc = &p->p_cpu->ci_schedstate; - spc->spc_nrun--; - - TAILQ_REMOVE(&spc->spc_qs[queue], p, p_runq); - if (TAILQ_EMPTY(&spc->spc_qs[queue])) { - spc->spc_whichqs &= ~(1 << queue); - if (spc->spc_whichqs == 0) - cpuset_del(&sched_queued_cpus, p->p_cpu); - } + if (!(RB_REMOVE(runqueue, &sched_runqueue, p))) + RB_REMOVE(expqueue, &sched_expqueue, p); } struct proc * sched_chooseproc(void) { struct schedstate_percpu *spc = &curcpu()->ci_schedstate; - struct proc *p; - int queue; + struct proc *p = NULL; + struct timeval now; SCHED_ASSERT_LOCKED(); if (spc->spc_schedflags & SPCF_SHOULDHALT) { - if (spc->spc_whichqs) { - for (queue = 0; queue < SCHED_NQS; queue++) { - TAILQ_FOREACH(p, &spc->spc_qs[queue], p_runq) { - remrunqueue(p); - p->p_cpu = sched_choosecpu(p); - setrunqueue(p); - } - } - } p = spc->spc_idleproc; KASSERT(p); KASSERT(p->p_wchan == NULL); @@ -270,16 +251,30 @@ return (p); } -again: - if (spc->spc_whichqs) { - queue = ffs(spc->spc_whichqs) - 1; - p = TAILQ_FIRST(&spc->spc_qs[queue]); - remrunqueue(p); - KASSERT(p->p_stat == SRUN); - } else if ((p = sched_steal_proc(curcpu())) == NULL) { - p = spc->spc_idleproc; - if (p == NULL) { - int s; + if ((!RB_EMPTY(&sched_runqueue)) || (!RB_EMPTY(&sched_expqueue))) { + /* + * Move expired processes to the expired runqueue where they are sorted by the time they got their + * deadline set instead of the deadline itself. + * */ + microuptime(&now); + while(!RB_EMPTY(&sched_runqueue)) { + p = RB_MIN(runqueue, &sched_runqueue); + if (!p) break; + if (!timercmp(&(p->p_deadline), &now, <)) break; + RB_INSERT(expqueue, &sched_expqueue, p); + RB_REMOVE(runqueue, &sched_runqueue, p); + } + + if (!RB_EMPTY(&sched_expqueue)) { + p = RB_MIN(expqueue, &sched_expqueue); + RB_REMOVE(expqueue, &sched_expqueue, p); + } else { + p = RB_MIN(runqueue, &sched_runqueue); + remrunqueue(p); + } + } else { + while ((p = spc->spc_idleproc) == NULL) { + int s; /* * We get here if someone decides to switch during * boot before forking kthreads, bleh. @@ -291,13 +286,15 @@ spl0(); delay(10); SCHED_LOCK(s); - goto again; - } + } KASSERT(p); + } + + if (p) { p->p_stat = SRUN; + p->p_cpu = curcpu(); } - KASSERT(p->p_wchan == NULL); return (p); } @@ -310,30 +307,13 @@ uint64_t sched_nomigrations; struct cpu_info * -sched_choosecpu_fork(struct proc *parent, int flags) +sched_choosecpu_fork(struct proc *parent) { struct cpu_info *choice = NULL; fixpt_t load, best_load = ~0; - int run, best_run = INT_MAX; struct cpu_info *ci; struct cpuset set; -#if 0 - /* - * XXX - * Don't do this until we have a painless way to move the cpu in exec. - * Preferably when nuking the old pmap and getting a new one on a - * new cpu. - */ - /* - * PPWAIT forks are simple. We know that the parent will not - * run until we exec and choose another cpu, so we just steal its - * cpu. - */ - if (flags & FORK_PPWAIT) - return (parent->p_cpu); -#endif - /* * Look at all cpus that are currently idle and have nothing queued. * If there are none, pick the one with least queued procs first, @@ -347,13 +327,10 @@ cpuset_del(&set, ci); load = ci->ci_schedstate.spc_ldavg; - run = ci->ci_schedstate.spc_nrun; - if (choice == NULL || run < best_run || - (run == best_run &&load < best_load)) { + if (choice == NULL || load < best_load) { choice = ci; best_load = load; - best_run = run; } } @@ -364,9 +341,6 @@ sched_choosecpu(struct proc *p) { struct cpu_info *choice = NULL; - int last_cost = INT_MAX; - struct cpu_info *ci; - struct cpuset set; /* * If pegged to a cpu, don't allow it to move. @@ -374,169 +348,19 @@ if (p->p_flag & P_CPUPEG) return (p->p_cpu); - sched_choose++; - - /* - * Look at all cpus that are currently idle and have nothing queued. - * If there are none, pick the cheapest of those. - * (idle + queued could mean that the cpu is handling an interrupt - * at this moment and haven't had time to leave idle yet). - */ - cpuset_complement(&set, &sched_queued_cpus, &sched_idle_cpus); - /* - * First, just check if our current cpu is in that set, if it is, - * this is simple. - * Also, our cpu might not be idle, but if it's the current cpu - * and it has nothing else queued and we're curproc, take it. + * Else, just pretend we forked a new process. */ - if (cpuset_isset(&set, p->p_cpu) || - (p->p_cpu == curcpu() && p->p_cpu->ci_schedstate.spc_nrun == 0 && - curproc == p)) { - sched_wasidle++; - return (p->p_cpu); - } - - if (cpuset_first(&set) == NULL) - cpuset_copy(&set, &sched_all_cpus); - - while ((ci = cpuset_first(&set)) != NULL) { - int cost = sched_proc_to_cpu_cost(ci, p); + sched_choose++; - if (choice == NULL || cost < last_cost) { - choice = ci; - last_cost = cost; - } - cpuset_del(&set, ci); - } + choice = sched_choosecpu_fork(p); if (p->p_cpu != choice) sched_nmigrations++; else sched_nomigrations++; - return (choice); -} - -/* - * Attempt to steal a proc from some cpu. - */ -struct proc * -sched_steal_proc(struct cpu_info *self) -{ - struct schedstate_percpu *spc; - struct proc *best = NULL; - int bestcost = INT_MAX; - struct cpu_info *ci; - struct cpuset set; - - cpuset_copy(&set, &sched_queued_cpus); - - while ((ci = cpuset_first(&set)) != NULL) { - struct proc *p; - int queue; - int cost; - - cpuset_del(&set, ci); - - spc = &ci->ci_schedstate; - - queue = ffs(spc->spc_whichqs) - 1; - TAILQ_FOREACH(p, &spc->spc_qs[queue], p_runq) { - if (p->p_flag & P_CPUPEG) - continue; - - cost = sched_proc_to_cpu_cost(self, p); - - if (best == NULL || cost < bestcost) { - best = p; - bestcost = cost; - } - } - } - if (best == NULL) - return (NULL); - - spc = &best->p_cpu->ci_schedstate; - remrunqueue(best); - best->p_cpu = self; - - sched_stolen++; - - return (best); -} - -/* - * Base 2 logarithm of an int. returns 0 for 0 (yeye, I know). - */ -static int -log2(unsigned int i) -{ - int ret = 0; - - while (i >>= 1) - ret++; - - return (ret); -} - -/* - * Calculate the cost of moving the proc to this cpu. - * - * What we want is some guesstimate of how much "performance" it will - * cost us to move the proc here. Not just for caches and TLBs and NUMA - * memory, but also for the proc itself. A highly loaded cpu might not - * be the best candidate for this proc since it won't get run. - * - * Just total guesstimates for now. - */ - -int sched_cost_load = 1; -int sched_cost_priority = 1; -int sched_cost_runnable = 3; -int sched_cost_resident = 1; - -int -sched_proc_to_cpu_cost(struct cpu_info *ci, struct proc *p) -{ - struct schedstate_percpu *spc; - int l2resident = 0; - int cost; - - spc = &ci->ci_schedstate; - - cost = 0; - - /* - * First, account for the priority of the proc we want to move. - * More willing to move, the lower the priority of the destination - * and the higher the priority of the proc. - */ - if (!cpuset_isset(&sched_idle_cpus, ci)) { - cost += (p->p_priority - spc->spc_curpriority) * - sched_cost_priority; - cost += sched_cost_runnable; - } - if (cpuset_isset(&sched_queued_cpus, ci)) { - cost += spc->spc_nrun * sched_cost_runnable; - } - - /* - * Higher load on the destination means we don't want to go there. - */ - cost += ((sched_cost_load * spc->spc_ldavg) >> FSHIFT); - - /* - * If the proc is on this cpu already, lower the cost by how much - * it has been running and an estimate of its footprint. - */ - if (p->p_cpu == ci && p->p_slptime == 0) { - l2resident = - log2(pmap_resident_count(p->p_vmspace->vm_map.pmap)); - cost -= l2resident * sched_cost_resident; - } - - return (cost); + return choice; } /* @@ -560,7 +384,6 @@ } #ifdef MULTIPROCESSOR - void sched_start_secondary_cpus(void) { @@ -583,6 +406,9 @@ { CPU_INFO_ITERATOR cii; struct cpu_info *ci; + int s; + + SCHED_LOCK(s); /* * Make sure we stop the secondary CPUs. @@ -601,14 +427,17 @@ if (CPU_IS_PRIMARY(ci)) continue; - while ((spc->spc_schedflags & SPCF_HALTED) == 0) { + + SCHED_UNLOCK(s); + while (!(spc->spc_schedflags & SPCF_HALTED)) { sleep_setup(&sls, spc, PZERO, "schedstate"); - sleep_finish(&sls, - (spc->spc_schedflags & SPCF_HALTED) == 0); + sleep_setup_timeout(&sls, 10); + sleep_finish(&sls, 1); } + SCHED_LOCK(s); } + SCHED_UNLOCK(s); } - #endif /* Index: kern/kern_synch.c =================================================================== RCS file: /cvs/src/sys/kern/kern_synch.c,v retrieving revision 1.99 diff -u -r1.99 kern_synch.c --- kern/kern_synch.c 17 Jan 2012 02:34:18 -0000 1.99 +++ kern/kern_synch.c 17 Jan 2012 16:10:10 -0000 @@ -205,7 +205,7 @@ p->p_wmesg = wmesg; p->p_slptime = 0; p->p_priority = prio & PRIMASK; - TAILQ_INSERT_TAIL(&slpque[LOOKUP(ident)], p, p_runq); + TAILQ_INSERT_TAIL(&slpque[LOOKUP(ident)], p, p_slpq); } void @@ -342,7 +342,7 @@ unsleep(struct proc *p) { if (p->p_wchan) { - TAILQ_REMOVE(&slpque[LOOKUP(p->p_wchan)], p, p_runq); + TAILQ_REMOVE(&slpque[LOOKUP(p->p_wchan)], p, p_slpq); p->p_wchan = NULL; } } @@ -361,7 +361,7 @@ SCHED_LOCK(s); qp = &slpque[LOOKUP(ident)]; for (p = TAILQ_FIRST(qp); p != NULL && n != 0; p = pnext) { - pnext = TAILQ_NEXT(p, p_runq); + pnext = TAILQ_NEXT(p, p_slpq); #ifdef DIAGNOSTIC if (p->p_stat != SSLEEP && p->p_stat != SSTOP) panic("wakeup: p_stat is %d", (int)p->p_stat); @@ -369,7 +369,7 @@ if (p->p_wchan == ident) { --n; p->p_wchan = 0; - TAILQ_REMOVE(qp, p, p_runq); + TAILQ_REMOVE(qp, p, p_slpq); if (p->p_stat == SSLEEP) { /* OPTIMIZED EXPANSION OF setrunnable(p); */ if (p->p_slptime > 1) Index: kern/sched_bsd.c =================================================================== RCS file: /cvs/src/sys/kern/sched_bsd.c,v retrieving revision 1.27 diff -u -r1.27 sched_bsd.c --- kern/sched_bsd.c 7 Jul 2011 18:00:33 -0000 1.27 +++ kern/sched_bsd.c 17 Jan 2012 16:10:10 -0000 @@ -77,37 +77,23 @@ timeout_set(&schedcpu_to, schedcpu, &schedcpu_to); - rrticks_init = hz / 10; + rrticks_init = hz / 50; schedcpu(&schedcpu_to); } /* - * Force switch among equal priority processes every 100ms. + * Force switch among equal priority processes every 20ms. */ void roundrobin(struct cpu_info *ci) { struct schedstate_percpu *spc = &ci->ci_schedstate; - spc->spc_rrticks = rrticks_init; - if (ci->ci_curproc != NULL) { - if (spc->spc_schedflags & SPCF_SEENRR) { - /* - * The process has already been through a roundrobin - * without switching and may be hogging the CPU. - * Indicate that the process should yield. - */ - atomic_setbits_int(&spc->spc_schedflags, - SPCF_SHOULDYIELD); - } else { - atomic_setbits_int(&spc->spc_schedflags, - SPCF_SEENRR); - } + atomic_setbits_int(&spc->spc_schedflags, SPCF_SHOULDYIELD); } - if (spc->spc_nrun) - need_resched(ci); + need_resched(ci); } /* @@ -204,7 +190,6 @@ struct timeout *to = (struct timeout *)arg; fixpt_t loadfac = loadfactor(averunnable.ldavg[0]); struct proc *p; - int s; unsigned int newcpu; int phz; @@ -233,7 +218,6 @@ */ if (p->p_slptime > 1) continue; - SCHED_LOCK(s); /* * p_pctcpu is only for ps. */ @@ -250,17 +234,6 @@ newcpu = (u_int) decay_cpu(loadfac, p->p_estcpu); p->p_estcpu = newcpu; resetpriority(p); - if (p->p_priority >= PUSER) { - if (p->p_stat == SRUN && - (p->p_priority / SCHED_PPQ) != - (p->p_usrpri / SCHED_PPQ)) { - remrunqueue(p); - p->p_priority = p->p_usrpri; - setrunqueue(p); - } else - p->p_priority = p->p_usrpri; - } - SCHED_UNLOCK(s); } uvm_meter(); wakeup(&lbolt); @@ -278,8 +251,6 @@ unsigned int newcpu = p->p_estcpu; fixpt_t loadfac = loadfactor(averunnable.ldavg[0]); - SCHED_ASSERT_LOCKED(); - if (p->p_slptime > 5 * loadfac) p->p_estcpu = 0; else { @@ -304,6 +275,7 @@ SCHED_LOCK(s); p->p_priority = p->p_usrpri; p->p_stat = SRUN; + generate_deadline(p, 1); setrunqueue(p); p->p_stats->p_ru.ru_nvcsw++; mi_switch(); @@ -332,6 +304,7 @@ p->p_priority = p->p_usrpri; p->p_stat = SRUN; p->p_cpu = sched_choosecpu(p); + generate_deadline(p, 0); setrunqueue(p); p->p_stats->p_ru.ru_nivcsw++; mi_switch(); @@ -372,14 +345,7 @@ * process was running, and add that to its total so far. */ microuptime(&tv); - if (timercmp(&tv, &spc->spc_runtime, <)) { -#if 0 - printf("uptime is not monotonic! " - "tv=%lu.%06lu, runtime=%lu.%06lu\n", - tv.tv_sec, tv.tv_usec, spc->spc_runtime.tv_sec, - spc->spc_runtime.tv_usec); -#endif - } else { + if (timercmp(&tv, &spc->spc_runtime, >=)) { timersub(&tv, &spc->spc_runtime, &tv); timeradd(&p->p_rtime, &tv, &p->p_rtime); } @@ -403,7 +369,7 @@ * Process is about to yield the CPU; clear the appropriate * scheduling flags. */ - atomic_clearbits_int(&spc->spc_schedflags, SPCF_SWITCHCLEAR); + atomic_clearbits_int(&spc->spc_schedflags, SPCF_SHOULDYIELD); nextproc = sched_chooseproc(); @@ -435,9 +401,8 @@ * be running on a new CPU now, so don't use the cache'd * schedstate_percpu pointer. */ - KASSERT(p->p_cpu == curcpu()); - - microuptime(&p->p_cpu->ci_schedstate.spc_runtime); + if (p->p_cpu == curcpu()) + microuptime(&p->p_cpu->ci_schedstate.spc_runtime); #ifdef MULTIPROCESSOR /* @@ -515,30 +480,56 @@ } p->p_stat = SRUN; p->p_cpu = sched_choosecpu(p); - setrunqueue(p); if (p->p_slptime > 1) updatepri(p); + setrunqueue(p); p->p_slptime = 0; resched_proc(p, p->p_priority); } /* * Compute the priority of a process when running in user mode. - * Arrange to reschedule if the resulting priority is better - * than that of the current process. */ void resetpriority(struct proc *p) { - unsigned int newpriority; - - SCHED_ASSERT_LOCKED(); + p->p_usrpri = min(((NZERO + (p->p_p->ps_nice))) << 1, MAXPRI); +} - newpriority = PUSER + p->p_estcpu + - NICE_WEIGHT * (p->p_p->ps_nice - NZERO); - newpriority = min(newpriority, MAXPRI); - p->p_usrpri = newpriority; - resched_proc(p, p->p_usrpri); +/* + * Generate a new virtual deadline for a process. The deadline is a soft + * one and has no purpose besides being used for choosing the next process + * to run. Also resets the number of round robin ticks available to the + * process if the previous timeslice expired and the process had to be preempted. + */ +void +generate_deadline(struct proc *p, char voluntary) { + /* + * For nice values between 0 and 39 inclusively, the offset lies between + * 32 and 1280 milliseconds for a machine with hz=100. That means that + * processes with nice value=0 (i.e. -20 in userland) will be executed + * 32 milliseconds in the future at the latest. Processes with very + * little priority will be executed 1.28 seconds in the future at the very + * latest. The shift is done to ensure that the lowest possible offset is + * larger than the timeslice, in order to make sure that the scheduler does + * not degenerate to round robin behaviour when more than just a few processes + * with high priority are started. + * + * If the process voluntarily yielded its CPU, we reward it by halving its + * deadline offset. + */ + unsigned int offset_msec = ((p->p_p->ps_nice + 1) * rrticks_init) << (voluntary ? 2 : 3); + struct timeval offset = { + .tv_sec = offset_msec / 1000, + .tv_usec = offset_msec % 1000 + }; + struct timeval now; + microuptime(&now); + bcopy(&now, &(p->p_deadline_set), sizeof(struct timeval)); + + timeradd(&now, &offset, &(p->p_deadline)); + if (!voluntary) + p->p_rrticks = rrticks_init; } /* @@ -559,12 +550,8 @@ void schedclock(struct proc *p) { - int s; - - SCHED_LOCK(s); p->p_estcpu = ESTCPULIM(p->p_estcpu + 1); resetpriority(p); if (p->p_priority >= PUSER) p->p_priority = p->p_usrpri; - SCHED_UNLOCK(s); }