Module Name: src Committed By: ad Date: Sat Nov 23 19:42:52 UTC 2019
Modified Files: src/sys/compat/linux/common: linux_sched.c src/sys/kern: kern_exec.c kern_fork.c kern_idle.c kern_kthread.c kern_lwp.c kern_runq.c kern_sleepq.c kern_softint.c kern_synch.c sched_4bsd.c sys_aio.c sys_lwp.c src/sys/rump/librump/rumpkern: scheduler.c src/sys/sys: cpu.h lwp.h sched.h Log Message: Minor scheduler cleanup: - Adapt to cpu_need_resched() changes. Avoid lost & duplicate IPIs and ASTs. sched_resched_cpu() and sched_resched_lwp() contain the logic for this. - Changes for LSIDL to make the locking scheme match the intended design. - Reduce lock contention and false sharing further. - Numerous small bugfixes, including some corrections for SCHED_FIFO/RT. - Use setrunnable() in more places, and merge cut & pasted code. To generate a diff of this commit: cvs rdiff -u -r1.72 -r1.73 src/sys/compat/linux/common/linux_sched.c cvs rdiff -u -r1.483 -r1.484 src/sys/kern/kern_exec.c cvs rdiff -u -r1.215 -r1.216 src/sys/kern/kern_fork.c cvs rdiff -u -r1.25 -r1.26 src/sys/kern/kern_idle.c cvs rdiff -u -r1.43 -r1.44 src/sys/kern/kern_kthread.c cvs rdiff -u -r1.211 -r1.212 src/sys/kern/kern_lwp.c cvs rdiff -u -r1.47 -r1.48 src/sys/kern/kern_runq.c cvs rdiff -u -r1.52 -r1.53 src/sys/kern/kern_sleepq.c cvs rdiff -u -r1.49 -r1.50 src/sys/kern/kern_softint.c cvs rdiff -u -r1.325 -r1.326 src/sys/kern/kern_synch.c cvs rdiff -u -r1.35 -r1.36 src/sys/kern/sched_4bsd.c cvs rdiff -u -r1.44 -r1.45 src/sys/kern/sys_aio.c cvs rdiff -u -r1.70 -r1.71 src/sys/kern/sys_lwp.c cvs rdiff -u -r1.44 -r1.45 src/sys/rump/librump/rumpkern/scheduler.c cvs rdiff -u -r1.43 -r1.44 src/sys/sys/cpu.h cvs rdiff -u -r1.189 -r1.190 src/sys/sys/lwp.h cvs rdiff -u -r1.76 -r1.77 src/sys/sys/sched.h Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/sys/compat/linux/common/linux_sched.c diff -u src/sys/compat/linux/common/linux_sched.c:1.72 src/sys/compat/linux/common/linux_sched.c:1.73 --- src/sys/compat/linux/common/linux_sched.c:1.72 Thu Oct 3 22:16:53 2019 +++ src/sys/compat/linux/common/linux_sched.c Sat Nov 23 19:42:52 2019 @@ -1,7 +1,7 @@ -/* $NetBSD: linux_sched.c,v 1.72 2019/10/03 22:16:53 kamil Exp $ */ +/* $NetBSD: linux_sched.c,v 1.73 2019/11/23 19:42:52 ad Exp $ */ /*- - * Copyright (c) 1999 The NetBSD Foundation, Inc. + * Copyright (c) 1999, 2019 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation @@ -35,7 +35,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: linux_sched.c,v 1.72 2019/10/03 22:16:53 kamil Exp $"); +__KERNEL_RCSID(0, "$NetBSD: linux_sched.c,v 1.73 2019/11/23 19:42:52 ad Exp $"); #include <sys/param.h> #include <sys/mount.h> @@ -180,7 +180,6 @@ linux_clone_nptl(struct lwp *l, const st struct lwp *l2; struct linux_emuldata *led; void *parent_tidptr, *tls, *child_tidptr; - struct schedstate_percpu *spc; vaddr_t uaddr; lwpid_t lid; int flags, tnprocs, error; @@ -248,31 +247,8 @@ linux_clone_nptl(struct lwp *l, const st } } - /* - * Set the new LWP running, unless the process is stopping, - * then the LWP is created stopped. - */ - mutex_enter(p->p_lock); - lwp_lock(l2); - spc = &l2->l_cpu->ci_schedstate; - if ((l->l_flag & (LW_WREBOOT | LW_DBGSUSPEND | LW_WSUSPEND | LW_WEXIT)) == 0) { - if (p->p_stat == SSTOP || (p->p_sflag & PS_STOPPING) != 0) { - KASSERT(l2->l_wchan == NULL); - l2->l_stat = LSSTOP; - p->p_nrlwps--; - lwp_unlock_to(l2, spc->spc_lwplock); - } else { - KASSERT(lwp_locked(l2, spc->spc_mutex)); - l2->l_stat = LSRUN; - sched_enqueue(l2, false); - lwp_unlock(l2); - } - } else { - l2->l_stat = LSSUSPENDED; - p->p_nrlwps--; - lwp_unlock_to(l2, spc->spc_lwplock); - } - mutex_exit(p->p_lock); + /* Set the new LWP running. */ + lwp_start(l2, 0); retval[0] = lid; retval[1] = 0; Index: src/sys/kern/kern_exec.c diff -u src/sys/kern/kern_exec.c:1.483 src/sys/kern/kern_exec.c:1.484 --- src/sys/kern/kern_exec.c:1.483 Sat Oct 12 10:55:23 2019 +++ src/sys/kern/kern_exec.c Sat Nov 23 19:42:52 2019 @@ -1,9 +1,12 @@ -/* $NetBSD: kern_exec.c,v 1.483 2019/10/12 10:55:23 kamil Exp $ */ +/* $NetBSD: kern_exec.c,v 1.484 2019/11/23 19:42:52 ad Exp $ */ /*- - * Copyright (c) 2008 The NetBSD Foundation, Inc. + * Copyright (c) 2008, 2019 The NetBSD Foundation, Inc. * All rights reserved. * + * This code is derived from software contributed to The NetBSD Foundation + * by Andrew Doran. + * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: @@ -59,7 +62,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: kern_exec.c,v 1.483 2019/10/12 10:55:23 kamil Exp $"); +__KERNEL_RCSID(0, "$NetBSD: kern_exec.c,v 1.484 2019/11/23 19:42:52 ad Exp $"); #include "opt_exec.h" #include "opt_execfmt.h" @@ -2650,11 +2653,11 @@ do_posix_spawn(struct lwp *l1, pid_t *pi lwp_lock(l2); KASSERT(p2->p_nrlwps == 1); + KASSERT(l2->l_stat == LSIDL); p2->p_nrlwps = 1; p2->p_stat = SACTIVE; - l2->l_stat = LSRUN; - sched_enqueue(l2, false); - lwp_unlock(l2); + setrunnable(l2); + /* LWP now unlocked */ mutex_exit(p2->p_lock); mutex_exit(proc_lock); Index: src/sys/kern/kern_fork.c diff -u src/sys/kern/kern_fork.c:1.215 src/sys/kern/kern_fork.c:1.216 --- src/sys/kern/kern_fork.c:1.215 Sat Oct 12 10:55:23 2019 +++ src/sys/kern/kern_fork.c Sat Nov 23 19:42:52 2019 @@ -1,7 +1,8 @@ -/* $NetBSD: kern_fork.c,v 1.215 2019/10/12 10:55:23 kamil Exp $ */ +/* $NetBSD: kern_fork.c,v 1.216 2019/11/23 19:42:52 ad Exp $ */ /*- - * Copyright (c) 1999, 2001, 2004, 2006, 2007, 2008 The NetBSD Foundation, Inc. + * Copyright (c) 1999, 2001, 2004, 2006, 2007, 2008, 2019 + * The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation @@ -67,7 +68,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: kern_fork.c,v 1.215 2019/10/12 10:55:23 kamil Exp $"); +__KERNEL_RCSID(0, "$NetBSD: kern_fork.c,v 1.216 2019/11/23 19:42:52 ad Exp $"); #include "opt_ktrace.h" #include "opt_dtrace.h" @@ -561,21 +562,20 @@ fork1(struct lwp *l1, int flags, int exi p2->p_acflag = AFORK; lwp_lock(l2); KASSERT(p2->p_nrlwps == 1); + KASSERT(l2->l_stat == LSIDL); if (p2->p_sflag & PS_STOPFORK) { - struct schedstate_percpu *spc = &l2->l_cpu->ci_schedstate; p2->p_nrlwps = 0; p2->p_stat = SSTOP; p2->p_waited = 0; p1->p_nstopchild++; l2->l_stat = LSSTOP; KASSERT(l2->l_wchan == NULL); - lwp_unlock_to(l2, spc->spc_lwplock); + lwp_unlock(l2); } else { p2->p_nrlwps = 1; p2->p_stat = SACTIVE; - l2->l_stat = LSRUN; - sched_enqueue(l2, false); - lwp_unlock(l2); + setrunnable(l2); + /* LWP now unlocked */ } /* Index: src/sys/kern/kern_idle.c diff -u src/sys/kern/kern_idle.c:1.25 src/sys/kern/kern_idle.c:1.26 --- src/sys/kern/kern_idle.c:1.25 Sun Jan 29 22:55:40 2012 +++ src/sys/kern/kern_idle.c Sat Nov 23 19:42:52 2019 @@ -1,4 +1,4 @@ -/* $NetBSD: kern_idle.c,v 1.25 2012/01/29 22:55:40 rmind Exp $ */ +/* $NetBSD: kern_idle.c,v 1.26 2019/11/23 19:42:52 ad Exp $ */ /*- * Copyright (c)2002, 2006, 2007 YAMAMOTO Takashi, @@ -28,7 +28,7 @@ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: kern_idle.c,v 1.25 2012/01/29 22:55:40 rmind Exp $"); +__KERNEL_RCSID(0, "$NetBSD: kern_idle.c,v 1.26 2019/11/23 19:42:52 ad Exp $"); #include <sys/param.h> #include <sys/cpu.h> @@ -50,26 +50,26 @@ idle_loop(void *dummy) struct lwp *l = curlwp; kcpuset_atomic_set(kcpuset_running, cpu_index(ci)); + spc = &ci->ci_schedstate; ci->ci_data.cpu_onproc = l; /* Update start time for this thread. */ lwp_lock(l); + KASSERT(lwp_locked(l, spc->spc_lwplock)); binuptime(&l->l_stime); + spc->spc_flags |= SPCF_RUNNING; + l->l_stat = LSONPROC; + l->l_pflag |= LP_RUNNING; lwp_unlock(l); /* * Use spl0() here to ensure that we have the correct interrupt * priority. This may be the first thread running on the CPU, - * in which case we took a dirtbag route to get here. + * in which case we took an odd route to get here. */ - spc = &ci->ci_schedstate; - (void)splsched(); - spc->spc_flags |= SPCF_RUNNING; spl0(); - KERNEL_UNLOCK_ALL(l, NULL); - l->l_stat = LSONPROC; - l->l_pflag |= LP_RUNNING; + for (;;) { LOCKDEBUG_BARRIER(NULL, 0); KASSERT((l->l_flag & LW_IDLE) != 0); @@ -113,7 +113,6 @@ create_idle_lwp(struct cpu_info *ci) lwp_lock(l); l->l_flag |= LW_IDLE; lwp_unlock(l); - l->l_cpu = ci; ci->ci_data.cpu_idlelwp = l; return error; Index: src/sys/kern/kern_kthread.c diff -u src/sys/kern/kern_kthread.c:1.43 src/sys/kern/kern_kthread.c:1.44 --- src/sys/kern/kern_kthread.c:1.43 Tue Jan 9 22:58:45 2018 +++ src/sys/kern/kern_kthread.c Sat Nov 23 19:42:52 2019 @@ -1,7 +1,7 @@ -/* $NetBSD: kern_kthread.c,v 1.43 2018/01/09 22:58:45 pgoyette Exp $ */ +/* $NetBSD: kern_kthread.c,v 1.44 2019/11/23 19:42:52 ad Exp $ */ /*- - * Copyright (c) 1998, 1999, 2007, 2009 The NetBSD Foundation, Inc. + * Copyright (c) 1998, 1999, 2007, 2009, 2019 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation @@ -31,7 +31,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: kern_kthread.c,v 1.43 2018/01/09 22:58:45 pgoyette Exp $"); +__KERNEL_RCSID(0, "$NetBSD: kern_kthread.c,v 1.44 2019/11/23 19:42:52 ad Exp $"); #include <sys/param.h> #include <sys/systm.h> @@ -108,10 +108,10 @@ kthread_create(pri_t pri, int flag, stru } mutex_enter(proc0.p_lock); lwp_lock(l); - l->l_priority = pri; + lwp_changepri(l, pri); if (ci != NULL) { if (ci != l->l_cpu) { - lwp_unlock_to(l, ci->ci_schedstate.spc_mutex); + lwp_unlock_to(l, ci->ci_schedstate.spc_lwplock); lwp_lock(l); } l->l_pflag |= LP_BOUND; @@ -133,15 +133,12 @@ kthread_create(pri_t pri, int flag, stru * Set the new LWP running, unless the caller has requested * otherwise. */ + KASSERT(l->l_stat == LSIDL); if ((flag & KTHREAD_IDLE) == 0) { - l->l_stat = LSRUN; - sched_enqueue(l, false); - lwp_unlock(l); + setrunnable(l); + /* LWP now unlocked */ } else { - if (ci != NULL) - lwp_unlock_to(l, ci->ci_schedstate.spc_lwplock); - else - lwp_unlock(l); + lwp_unlock(l); } mutex_exit(proc0.p_lock); Index: src/sys/kern/kern_lwp.c diff -u src/sys/kern/kern_lwp.c:1.211 src/sys/kern/kern_lwp.c:1.212 --- src/sys/kern/kern_lwp.c:1.211 Thu Nov 21 19:47:21 2019 +++ src/sys/kern/kern_lwp.c Sat Nov 23 19:42:52 2019 @@ -1,4 +1,4 @@ -/* $NetBSD: kern_lwp.c,v 1.211 2019/11/21 19:47:21 ad Exp $ */ +/* $NetBSD: kern_lwp.c,v 1.212 2019/11/23 19:42:52 ad Exp $ */ /*- * Copyright (c) 2001, 2006, 2007, 2008, 2009, 2019 The NetBSD Foundation, Inc. @@ -161,22 +161,23 @@ * * States and their associated locks: * - * LSONPROC, LSZOMB: + * LSIDL, LSONPROC, LSZOMB, LSSUPENDED: * - * Always covered by spc_lwplock, which protects running LWPs. - * This is a per-CPU lock and matches lwp::l_cpu. + * Always covered by spc_lwplock, which protects LWPs not + * associated with any other sync object. This is a per-CPU + * lock and matches lwp::l_cpu. * - * LSIDL, LSRUN: + * LSRUN: * * Always covered by spc_mutex, which protects the run queues. * This is a per-CPU lock and matches lwp::l_cpu. * * LSSLEEP: * - * Covered by a lock associated with the sleep queue that the - * LWP resides on. Matches lwp::l_sleepq::sq_mutex. + * Covered by a lock associated with the sleep queue (sometimes + * a turnstile sleep queue) that the LWP resides on. * - * LSSTOP, LSSUSPENDED: + * LSSTOP: * * If the LWP was previously sleeping (l_wchan != NULL), then * l_mutex references the sleep queue lock. If the LWP was @@ -185,10 +186,7 @@ * * The lock order is as follows: * - * spc::spc_lwplock -> - * sleeptab::st_mutex -> - * tschain_t::tc_mutex -> - * spc::spc_mutex + * sleepq -> turnstile -> spc_lwplock -> spc_mutex * * Each process has an scheduler state lock (proc::p_lock), and a * number of counters on LWPs and their states: p_nzlwps, p_nrlwps, and @@ -199,7 +197,7 @@ * LSIDL, LSZOMB, LSSTOP, LSSUSPENDED * * (But not always for kernel threads. There are some special cases - * as mentioned above. See kern_softint.c.) + * as mentioned above: soft interrupts, and the idle loops.) * * Note that an LWP is considered running or likely to run soon if in * one of the following states. This affects the value of p_nrlwps: @@ -211,7 +209,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: kern_lwp.c,v 1.211 2019/11/21 19:47:21 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: kern_lwp.c,v 1.212 2019/11/23 19:42:52 ad Exp $"); #include "opt_ddb.h" #include "opt_lockdebug.h" @@ -841,7 +839,7 @@ lwp_create(lwp_t *l1, proc_t *p2, vaddr_ l2->l_inheritedprio = -1; l2->l_protectprio = -1; l2->l_auxprio = -1; - l2->l_flag = 0; + l2->l_flag = (l1->l_flag & (LW_WEXIT | LW_WREBOOT | LW_WCORE)); l2->l_pflag = LP_MPSAFE; TAILQ_INIT(&l2->l_ld_locks); l2->l_psrefs = 0; @@ -874,7 +872,7 @@ lwp_create(lwp_t *l1, proc_t *p2, vaddr_ } kpreempt_disable(); - l2->l_mutex = l1->l_cpu->ci_schedstate.spc_mutex; + l2->l_mutex = l1->l_cpu->ci_schedstate.spc_lwplock; l2->l_cpu = l1->l_cpu; kpreempt_enable(); @@ -984,6 +982,35 @@ lwp_create(lwp_t *l1, proc_t *p2, vaddr_ } /* + * Set a new LWP running. If the process is stopping, then the LWP is + * created stopped. + */ +void +lwp_start(lwp_t *l, int flags) +{ + proc_t *p = l->l_proc; + + mutex_enter(p->p_lock); + lwp_lock(l); + KASSERT(l->l_stat == LSIDL); + if ((flags & LWP_SUSPENDED) != 0) { + /* It'll suspend itself in lwp_userret(). */ + l->l_flag |= LW_WSUSPEND; + } + if (p->p_stat == SSTOP || (p->p_sflag & PS_STOPPING) != 0) { + KASSERT(l->l_wchan == NULL); + l->l_stat = LSSTOP; + p->p_nrlwps--; + lwp_unlock(l); + } else { + l->l_cpu = curcpu(); + setrunnable(l); + /* LWP now unlocked */ + } + mutex_exit(p->p_lock); +} + +/* * Called by MD code when a new LWP begins execution. Must be called * with the previous LWP locked (so at splsched), or if there is no * previous LWP, at splsched. @@ -1345,13 +1372,10 @@ lwp_migrate(lwp_t *l, struct cpu_info *t case LSRUN: l->l_target_cpu = tci; break; - case LSIDL: - l->l_cpu = tci; - lwp_unlock_to(l, tspc->spc_mutex); - return; case LSSLEEP: l->l_cpu = tci; break; + case LSIDL: case LSSTOP: case LSSUSPENDED: l->l_cpu = tci; @@ -1363,8 +1387,8 @@ lwp_migrate(lwp_t *l, struct cpu_info *t case LSONPROC: l->l_target_cpu = tci; spc_lock(l->l_cpu); - cpu_need_resched(l->l_cpu, RESCHED_KPREEMPT); - spc_unlock(l->l_cpu); + sched_resched_cpu(l->l_cpu, PRI_USER_RT, true); + /* spc now unlocked */ break; } lwp_unlock(l); Index: src/sys/kern/kern_runq.c diff -u src/sys/kern/kern_runq.c:1.47 src/sys/kern/kern_runq.c:1.48 --- src/sys/kern/kern_runq.c:1.47 Thu Jun 1 02:45:13 2017 +++ src/sys/kern/kern_runq.c Sat Nov 23 19:42:52 2019 @@ -1,4 +1,33 @@ -/* $NetBSD: kern_runq.c,v 1.47 2017/06/01 02:45:13 chs Exp $ */ +/* $NetBSD: kern_runq.c,v 1.48 2019/11/23 19:42:52 ad Exp $ */ + +/*- + * Copyright (c) 2019 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Andrew Doran. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ /* * Copyright (c) 2007, 2008 Mindaugas Rasiukevicius <rmind at NetBSD org> @@ -27,7 +56,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: kern_runq.c,v 1.47 2017/06/01 02:45:13 chs Exp $"); +__KERNEL_RCSID(0, "$NetBSD: kern_runq.c,v 1.48 2019/11/23 19:42:52 ad Exp $"); #include "opt_dtrace.h" @@ -101,7 +130,6 @@ static void sched_balance(void *); /* * Preemption control. */ -int sched_upreempt_pri = 0; #ifdef __HAVE_PREEMPTION # ifdef DEBUG int sched_kpreempt_pri = 0; @@ -209,27 +237,24 @@ sched_getrq(runqueue_t *ci_rq, const pri &ci_rq->r_rt_queue[prio - PRI_HIGHEST_TS - 1].q_head; } +/* + * Put an LWP onto a run queue. The LWP must be locked by spc_mutex for + * l_cpu. + */ void -sched_enqueue(struct lwp *l, bool swtch) +sched_enqueue(struct lwp *l) { runqueue_t *ci_rq; struct schedstate_percpu *spc; TAILQ_HEAD(, lwp) *q_head; const pri_t eprio = lwp_eprio(l); struct cpu_info *ci; - int type; ci = l->l_cpu; spc = &ci->ci_schedstate; ci_rq = spc->spc_sched_info; KASSERT(lwp_locked(l, l->l_cpu->ci_schedstate.spc_mutex)); - /* Update the last run time on switch */ - if (__predict_true(swtch == true)) - l->l_rticksum += (hardclock_ticks - l->l_rticks); - else if (l->l_rticks == 0) - l->l_rticks = hardclock_ticks; - /* Enqueue the thread */ q_head = sched_getrq(ci_rq, eprio); if (TAILQ_EMPTY(q_head)) { @@ -242,7 +267,12 @@ sched_enqueue(struct lwp *l, bool swtch) KASSERT((ci_rq->r_bitmap[i] & q) == 0); ci_rq->r_bitmap[i] |= q; } - TAILQ_INSERT_TAIL(q_head, l, l_runq); + /* Preempted SCHED_RR and SCHED_FIFO LWPs go to the queue head. */ + if (l->l_class != SCHED_OTHER && (l->l_pflag & LP_PREEMPTING) != 0) { + TAILQ_INSERT_HEAD(q_head, l, l_runq); + } else { + TAILQ_INSERT_TAIL(q_head, l, l_runq); + } ci_rq->r_count++; if ((l->l_pflag & LP_BOUND) == 0) ci_rq->r_mcount++; @@ -255,23 +285,12 @@ sched_enqueue(struct lwp *l, bool swtch) spc->spc_maxpriority = eprio; sched_newts(l); - - /* - * Wake the chosen CPU or cause a preemption if the newly - * enqueued thread has higher priority. Don't cause a - * preemption if the thread is yielding (swtch). - */ - if (!swtch && eprio > spc->spc_curpriority) { - if (eprio >= sched_kpreempt_pri) - type = RESCHED_KPREEMPT; - else if (eprio >= sched_upreempt_pri) - type = RESCHED_IMMED; - else - type = RESCHED_LAZY; - cpu_need_resched(ci, type); - } } +/* + * Remove and LWP from the run queue it's on. The LWP must be in state + * LSRUN. + */ void sched_dequeue(struct lwp *l) { @@ -329,6 +348,121 @@ sched_dequeue(struct lwp *l) } /* + * Cause a preemption on the given CPU, if the priority "pri" is higher + * priority than the running LWP. If "unlock" is specified, and ideally it + * will be for concurrency reasons, spc_mutex will be dropped before return. + */ +void +sched_resched_cpu(struct cpu_info *ci, pri_t pri, bool unlock) +{ + struct schedstate_percpu *spc; + u_int o, n, f; + lwp_t *l; + + spc = &ci->ci_schedstate; + + KASSERT(mutex_owned(spc->spc_mutex)); + + /* + * If the priority level we're evaluating wouldn't cause a new LWP + * to be run on the CPU, then we have nothing to do. + */ + if (pri <= spc->spc_curpriority) { + if (__predict_true(unlock)) { + spc_unlock(ci); + } + return; + } + + /* + * Figure out what kind of preemption we should do. + */ + l = ci->ci_data.cpu_onproc; + if ((l->l_flag & LW_IDLE) != 0) { + f = RESCHED_IDLE | RESCHED_UPREEMPT; + } else if ((l->l_pflag & LP_INTR) != 0) { + /* We can't currently preempt interrupt LWPs - should do. */ + if (__predict_true(unlock)) { + spc_unlock(ci); + } + return; + } else if (pri >= sched_kpreempt_pri) { +#ifdef __HAVE_PREEMPTION + f = RESCHED_KPREEMPT; +#else + /* Leave door open for test: set kpreempt_pri with sysctl. */ + f = RESCHED_UPREEMPT; +#endif + /* + * l_dopreempt must be set with the CPU locked to sync with + * mi_switch(). It must also be set with an atomic to sync + * with kpreempt(). + */ + atomic_or_uint(&l->l_dopreempt, DOPREEMPT_ACTIVE); + } else { + f = RESCHED_UPREEMPT; + } + if (ci != curcpu()) { + f |= RESCHED_REMOTE; + } + + /* + * Things start as soon as we touch ci_want_resched: x86 for example + * has an instruction that monitors the memory cell it's in. We + * want to drop the schedstate lock in advance, otherwise the remote + * CPU can awaken and immediately block on the lock. + */ + if (__predict_true(unlock)) { + spc_unlock(ci); + } + + /* + * The caller will always have a second scheduler lock held: either + * the running LWP lock (spc_lwplock), or a sleep queue lock. That + * keeps preemption disabled, which among other things ensures all + * LWPs involved won't be freed while we're here (see lwp_dtor()). + */ + KASSERT(kpreempt_disabled()); + + for (o = 0;; o = n) { + n = atomic_cas_uint(&ci->ci_want_resched, o, o | f); + if (__predict_true(o == n)) { + /* + * We're the first. If we're in process context on + * the same CPU, we can avoid the visit to trap(). + */ + if (l != curlwp || cpu_intr_p()) { + cpu_need_resched(ci, l, f); + } + break; + } + if (__predict_true( + (n & (RESCHED_KPREEMPT|RESCHED_UPREEMPT)) >= + (f & (RESCHED_KPREEMPT|RESCHED_UPREEMPT)))) { + /* Already in progress, nothing to do. */ + break; + } + } +} + +/* + * Cause a preemption on the given CPU, if the priority of LWP "l" in state + * LSRUN, is higher priority than the running LWP. If "unlock" is + * specified, and ideally it will be for concurrency reasons, spc_mutex will + * be dropped before return. + */ +void +sched_resched_lwp(struct lwp *l, bool unlock) +{ + struct cpu_info *ci = l->l_cpu; + + KASSERT(lwp_locked(l, ci->ci_schedstate.spc_mutex)); + KASSERT(l->l_stat == LSRUN); + + sched_resched_cpu(ci, lwp_eprio(l), unlock); +} + +/* * Migration and balancing. */ @@ -385,6 +519,7 @@ sched_takecpu(struct lwp *l) spc = &ci->ci_schedstate; ci_rq = spc->spc_sched_info; + eprio = lwp_eprio(l); /* Make sure that thread is in appropriate processor-set */ if (__predict_true(spc->spc_psid == l->l_psid)) { @@ -393,15 +528,22 @@ sched_takecpu(struct lwp *l) ci_rq->r_ev_stay.ev_count++; return ci; } + /* + * New LWPs must start on the same CPU as the parent (l_cpu + * was inherited when the LWP was created). Doing otherwise + * is bad for performance and repeatability, and agitates + * buggy programs. Also, we want the child to have a good + * chance of reusing the VM context from the parent. + */ + if (l->l_stat == LSIDL) { + ci_rq->r_ev_stay.ev_count++; + return ci; + } /* Stay if thread is cache-hot */ - eprio = lwp_eprio(l); - if (__predict_true(l->l_stat != LSIDL) && - lwp_cache_hot(l) && eprio >= spc->spc_curpriority) { + if (lwp_cache_hot(l) && eprio >= spc->spc_curpriority) { ci_rq->r_ev_stay.ev_count++; return ci; } - } else { - eprio = lwp_eprio(l); } /* Run on current CPU if priority of thread is higher */ @@ -507,7 +649,7 @@ sched_catchlwp(struct cpu_info *ci) l->l_cpu = curci; ci_rq->r_ev_pull.ev_count++; lwp_unlock_to(l, curspc->spc_mutex); - sched_enqueue(l, false); + sched_enqueue(l); return l; } spc_unlock(ci); @@ -569,7 +711,7 @@ sched_idle(void) { struct cpu_info *ci = curcpu(), *tci = NULL; struct schedstate_percpu *spc, *tspc; - runqueue_t *ci_rq; + runqueue_t *ci_rq, *tci_rq; bool dlock = false; /* Check if there is a migrating LWP */ @@ -631,8 +773,11 @@ sched_idle(void) sched_dequeue(l); l->l_cpu = tci; lwp_setlock(l, tspc->spc_mutex); - sched_enqueue(l, false); - break; + sched_enqueue(l); + sched_resched_lwp(l, true); + /* tci now unlocked */ + spc_unlock(ci); + goto no_migration; } if (dlock == true) { KASSERT(tci != NULL); @@ -653,9 +798,13 @@ no_migration: tspc = &tci->ci_schedstate; if (ci == tci || spc->spc_psid != tspc->spc_psid) return; - spc_dlock(ci, tci); - (void)sched_catchlwp(tci); - spc_unlock(ci); + /* Don't hit the locks unless there's something to do. */ + tci_rq = tci->ci_schedstate.spc_sched_info; + if (tci_rq->r_mcount >= min_catch) { + spc_dlock(ci, tci); + (void)sched_catchlwp(tci); + spc_unlock(ci); + } } #else @@ -746,6 +895,10 @@ sched_nextlwp(void) runqueue_t *ci_rq; struct lwp *l; + /* Update the last run time on switch */ + l = curlwp; + l->l_rticksum += (hardclock_ticks - l->l_rticks); + /* Return to idle LWP if there is a migrating thread */ spc = &ci->ci_schedstate; if (__predict_false(spc->spc_migrating != NULL)) @@ -873,12 +1026,6 @@ SYSCTL_SETUP(sysctl_sched_setup, "sysctl SYSCTL_DESCR("Minimum priority to trigger kernel preemption"), NULL, 0, &sched_kpreempt_pri, 0, CTL_CREATE, CTL_EOL); - sysctl_createv(clog, 0, &node, NULL, - CTLFLAG_PERMANENT | CTLFLAG_READWRITE, - CTLTYPE_INT, "upreempt_pri", - SYSCTL_DESCR("Minimum priority to trigger user preemption"), - NULL, 0, &sched_upreempt_pri, 0, - CTL_CREATE, CTL_EOL); } /* Index: src/sys/kern/kern_sleepq.c diff -u src/sys/kern/kern_sleepq.c:1.52 src/sys/kern/kern_sleepq.c:1.53 --- src/sys/kern/kern_sleepq.c:1.52 Thu Nov 21 18:56:55 2019 +++ src/sys/kern/kern_sleepq.c Sat Nov 23 19:42:52 2019 @@ -1,4 +1,4 @@ -/* $NetBSD: kern_sleepq.c,v 1.52 2019/11/21 18:56:55 ad Exp $ */ +/* $NetBSD: kern_sleepq.c,v 1.53 2019/11/23 19:42:52 ad Exp $ */ /*- * Copyright (c) 2006, 2007, 2008, 2009, 2019 The NetBSD Foundation, Inc. @@ -35,7 +35,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: kern_sleepq.c,v 1.52 2019/11/21 18:56:55 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: kern_sleepq.c,v 1.53 2019/11/23 19:42:52 ad Exp $"); #include <sys/param.h> #include <sys/kernel.h> @@ -156,8 +156,9 @@ sleepq_remove(sleepq_t *sq, lwp_t *l) sched_setrunnable(l); l->l_stat = LSRUN; l->l_slptime = 0; - sched_enqueue(l, false); - spc_unlock(ci); + sched_enqueue(l); + sched_resched_lwp(l, true); + /* LWP & SPC now unlocked, but we still hold sleep queue lock. */ } /* Index: src/sys/kern/kern_softint.c diff -u src/sys/kern/kern_softint.c:1.49 src/sys/kern/kern_softint.c:1.50 --- src/sys/kern/kern_softint.c:1.49 Thu Nov 21 17:50:49 2019 +++ src/sys/kern/kern_softint.c Sat Nov 23 19:42:52 2019 @@ -1,4 +1,4 @@ -/* $NetBSD: kern_softint.c,v 1.49 2019/11/21 17:50:49 ad Exp $ */ +/* $NetBSD: kern_softint.c,v 1.50 2019/11/23 19:42:52 ad Exp $ */ /*- * Copyright (c) 2007, 2008, 2019 The NetBSD Foundation, Inc. @@ -170,7 +170,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: kern_softint.c,v 1.49 2019/11/21 17:50:49 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: kern_softint.c,v 1.50 2019/11/23 19:42:52 ad Exp $"); #include <sys/param.h> #include <sys/proc.h> @@ -661,19 +661,20 @@ schednetisr(int isr) void softint_init_md(lwp_t *l, u_int level, uintptr_t *machdep) { + struct proc *p; softint_t *si; *machdep = (1 << level); si = l->l_private; + p = l->l_proc; - lwp_lock(l); - lwp_unlock_to(l, l->l_cpu->ci_schedstate.spc_mutex); + mutex_enter(p->p_lock); lwp_lock(l); /* Cheat and make the KASSERT in softint_thread() happy. */ si->si_active = 1; - l->l_stat = LSRUN; - sched_enqueue(l, false); - lwp_unlock(l); + setrunnable(l); + /* LWP now unlocked */ + mutex_exit(p->p_lock); } /* @@ -692,10 +693,10 @@ softint_trigger(uintptr_t machdep) ci = l->l_cpu; ci->ci_data.cpu_softints |= machdep; if (l == ci->ci_data.cpu_idlelwp) { - cpu_need_resched(ci, 0); + atomic_or_uint(&ci->ci_want_resched, RESCHED_UPREEMPT); } else { /* MI equivalent of aston() */ - cpu_signotify(l); + lwp_need_userret(l); } } Index: src/sys/kern/kern_synch.c diff -u src/sys/kern/kern_synch.c:1.325 src/sys/kern/kern_synch.c:1.326 --- src/sys/kern/kern_synch.c:1.325 Thu Nov 21 20:51:05 2019 +++ src/sys/kern/kern_synch.c Sat Nov 23 19:42:52 2019 @@ -1,4 +1,4 @@ -/* $NetBSD: kern_synch.c,v 1.325 2019/11/21 20:51:05 ad Exp $ */ +/* $NetBSD: kern_synch.c,v 1.326 2019/11/23 19:42:52 ad Exp $ */ /*- * Copyright (c) 1999, 2000, 2004, 2006, 2007, 2008, 2009, 2019 @@ -69,7 +69,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: kern_synch.c,v 1.325 2019/11/21 20:51:05 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: kern_synch.c,v 1.326 2019/11/23 19:42:52 ad Exp $"); #include "opt_kstack.h" #include "opt_dtrace.h" @@ -104,7 +104,6 @@ dtrace_vtime_switch_func_t dtrace_v static void sched_unsleep(struct lwp *, bool); static void sched_changepri(struct lwp *, pri_t); static void sched_lendpri(struct lwp *, pri_t); -static void resched_cpu(struct lwp *); syncobj_t sleep_syncobj = { .sobj_flag = SOBJ_SLEEPQ_SORTED, @@ -303,10 +302,10 @@ preempt(void) * * Character addresses for lockstat only. */ -static char in_critical_section; +static char kpreempt_is_disabled; static char kernel_lock_held; -static char is_softint; -static char cpu_kpreempt_enter_fail; +static char is_softint_lwp; +static char spl_is_raised; bool kpreempt(uintptr_t where) @@ -338,13 +337,13 @@ kpreempt(uintptr_t where) if ((dop & DOPREEMPT_COUNTED) == 0) { kpreempt_ev_crit.ev_count++; } - failed = (uintptr_t)&in_critical_section; + failed = (uintptr_t)&kpreempt_is_disabled; break; } if (__predict_false((l->l_pflag & LP_INTR) != 0)) { /* Can't preempt soft interrupts yet. */ atomic_swap_uint(&l->l_dopreempt, 0); - failed = (uintptr_t)&is_softint; + failed = (uintptr_t)&is_softint_lwp; break; } s = splsched(); @@ -365,7 +364,7 @@ kpreempt(uintptr_t where) * interrupt to retry later. */ splx(s); - failed = (uintptr_t)&cpu_kpreempt_enter_fail; + failed = (uintptr_t)&spl_is_raised; break; } /* Do it! */ @@ -373,6 +372,7 @@ kpreempt(uintptr_t where) kpreempt_ev_immed.ev_count++; } lwp_lock(l); + l->l_pflag |= LP_PREEMPTING; mi_switch(l); l->l_nopreempt++; splx(s); @@ -555,13 +555,6 @@ mi_switch(lwp_t *l) } #endif /* !__HAVE_FAST_SOFTINTS */ - /* Count time spent in current system call */ - if (!returning) { - SYSCALL_TIME_SLEEP(l); - - updatertime(l, &bt); - } - /* Lock the runqueue */ KASSERT(l->l_stat != LSRUN); mutex_spin_enter(spc->spc_mutex); @@ -574,7 +567,7 @@ mi_switch(lwp_t *l) if ((l->l_flag & LW_IDLE) == 0) { l->l_stat = LSRUN; lwp_setlock(l, spc->spc_mutex); - sched_enqueue(l, true); + sched_enqueue(l); /* * Handle migration. Note that "migrating LWP" may * be reset here, if interrupt/preemption happens @@ -596,6 +589,11 @@ mi_switch(lwp_t *l) /* Items that must be updated with the CPU locked. */ if (!returning) { + /* Count time spent in current system call */ + SYSCALL_TIME_SLEEP(l); + + updatertime(l, &bt); + /* Update the new LWP's start time. */ newl->l_stime = bt; @@ -656,9 +654,8 @@ mi_switch(lwp_t *l) l->l_ncsw++; if ((l->l_pflag & LP_PREEMPTING) != 0) l->l_nivcsw++; - l->l_pflag &= ~LP_PREEMPTING; KASSERT((l->l_pflag & LP_RUNNING) != 0); - l->l_pflag &= ~LP_RUNNING; + l->l_pflag &= ~(LP_RUNNING | LP_PREEMPTING); /* * Increase the count of spin-mutexes before the release @@ -882,6 +879,7 @@ setrunnable(struct lwp *l) { struct proc *p = l->l_proc; struct cpu_info *ci; + kmutex_t *oldlock; KASSERT((l->l_flag & LW_IDLE) == 0); KASSERT((l->l_flag & LW_DBGSUSPEND) == 0); @@ -900,6 +898,7 @@ setrunnable(struct lwp *l) p->p_nrlwps++; break; case LSSUSPENDED: + KASSERT(lwp_locked(l, l->l_cpu->ci_schedstate.spc_lwplock)); l->l_flag &= ~LW_WSUSPEND; p->p_nrlwps++; cv_broadcast(&p->p_lwpcv); @@ -907,6 +906,9 @@ setrunnable(struct lwp *l) case LSSLEEP: KASSERT(l->l_wchan != NULL); break; + case LSIDL: + KASSERT(lwp_locked(l, l->l_cpu->ci_schedstate.spc_lwplock)); + break; default: panic("setrunnable: lwp %p state was %d", l, l->l_stat); } @@ -939,14 +941,14 @@ setrunnable(struct lwp *l) ci = sched_takecpu(l); l->l_cpu = ci; spc_lock(ci); - lwp_unlock_to(l, ci->ci_schedstate.spc_mutex); + oldlock = lwp_setlock(l, l->l_cpu->ci_schedstate.spc_mutex); sched_setrunnable(l); l->l_stat = LSRUN; l->l_slptime = 0; - - sched_enqueue(l, false); - resched_cpu(l); - lwp_unlock(l); + sched_enqueue(l); + sched_resched_lwp(l, true); + /* SPC & LWP now unlocked. */ + mutex_spin_exit(oldlock); } /* @@ -1012,13 +1014,19 @@ suspendsched(void) /* * Kick all CPUs to make them preempt any LWPs running in user mode. - * They'll trap into the kernel and suspend themselves in userret(). + * They'll trap into the kernel and suspend themselves in userret(). + * + * Unusually, we don't hold any other scheduler object locked, which + * would keep preemption off for sched_resched_cpu(), so disable it + * explicitly. */ + kpreempt_disable(); for (CPU_INFO_FOREACH(cii, ci)) { spc_lock(ci); - cpu_need_resched(ci, RESCHED_IMMED); - spc_unlock(ci); + sched_resched_cpu(ci, PRI_KERNEL, true); + /* spc now unlocked */ } + kpreempt_enable(); } /* @@ -1037,49 +1045,64 @@ sched_unsleep(struct lwp *l, bool cleanu } static void -resched_cpu(struct lwp *l) -{ - struct cpu_info *ci = l->l_cpu; - - KASSERT(lwp_locked(l, NULL)); - if (lwp_eprio(l) > ci->ci_schedstate.spc_curpriority) - cpu_need_resched(ci, 0); -} - -static void sched_changepri(struct lwp *l, pri_t pri) { + struct schedstate_percpu *spc; + struct cpu_info *ci; KASSERT(lwp_locked(l, NULL)); + ci = l->l_cpu; + spc = &ci->ci_schedstate; + if (l->l_stat == LSRUN) { - KASSERT(lwp_locked(l, l->l_cpu->ci_schedstate.spc_mutex)); + KASSERT(lwp_locked(l, spc->spc_mutex)); sched_dequeue(l); l->l_priority = pri; - sched_enqueue(l, false); + sched_enqueue(l); + sched_resched_lwp(l, false); + } else if (l->l_stat == LSONPROC && l->l_class != SCHED_OTHER) { + /* On priority drop, only evict realtime LWPs. */ + KASSERT(lwp_locked(l, spc->spc_lwplock)); + l->l_priority = pri; + spc_lock(ci); + sched_resched_cpu(ci, spc->spc_maxpriority, true); + /* spc now unlocked */ } else { l->l_priority = pri; } - resched_cpu(l); } static void sched_lendpri(struct lwp *l, pri_t pri) { + struct schedstate_percpu *spc; + struct cpu_info *ci; KASSERT(lwp_locked(l, NULL)); + ci = l->l_cpu; + spc = &ci->ci_schedstate; + if (l->l_stat == LSRUN) { - KASSERT(lwp_locked(l, l->l_cpu->ci_schedstate.spc_mutex)); + KASSERT(lwp_locked(l, spc->spc_mutex)); sched_dequeue(l); l->l_inheritedprio = pri; l->l_auxprio = MAX(l->l_inheritedprio, l->l_protectprio); - sched_enqueue(l, false); + sched_enqueue(l); + sched_resched_lwp(l, false); + } else if (l->l_stat == LSONPROC && l->l_class != SCHED_OTHER) { + /* On priority drop, only evict realtime LWPs. */ + KASSERT(lwp_locked(l, spc->spc_lwplock)); + l->l_inheritedprio = pri; + l->l_auxprio = MAX(l->l_inheritedprio, l->l_protectprio); + spc_lock(ci); + sched_resched_cpu(ci, spc->spc_maxpriority, true); + /* spc now unlocked */ } else { l->l_inheritedprio = pri; l->l_auxprio = MAX(l->l_inheritedprio, l->l_protectprio); } - resched_cpu(l); } struct lwp * Index: src/sys/kern/sched_4bsd.c diff -u src/sys/kern/sched_4bsd.c:1.35 src/sys/kern/sched_4bsd.c:1.36 --- src/sys/kern/sched_4bsd.c:1.35 Mon Sep 3 16:29:35 2018 +++ src/sys/kern/sched_4bsd.c Sat Nov 23 19:42:52 2019 @@ -1,7 +1,8 @@ -/* $NetBSD: sched_4bsd.c,v 1.35 2018/09/03 16:29:35 riastradh Exp $ */ +/* $NetBSD: sched_4bsd.c,v 1.36 2019/11/23 19:42:52 ad Exp $ */ /* - * Copyright (c) 1999, 2000, 2004, 2006, 2007, 2008 The NetBSD Foundation, Inc. + * Copyright (c) 1999, 2000, 2004, 2006, 2007, 2008, 2019 + * The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation @@ -68,7 +69,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: sched_4bsd.c,v 1.35 2018/09/03 16:29:35 riastradh Exp $"); +__KERNEL_RCSID(0, "$NetBSD: sched_4bsd.c,v 1.36 2019/11/23 19:42:52 ad Exp $"); #include "opt_ddb.h" #include "opt_lockdebug.h" @@ -96,9 +97,6 @@ static int rrticks __read_mostly; /* * Force switch among equal priority processes every 100ms. * Called from hardclock every hz/10 == rrticks hardclock ticks. - * - * There's no need to lock anywhere in this routine, as it's - * CPU-local and runs at IPL_SCHED (called from clock interrupt). */ /* ARGSUSED */ void @@ -110,20 +108,27 @@ sched_tick(struct cpu_info *ci) spc->spc_ticks = rrticks; if (CURCPU_IDLE_P()) { - cpu_need_resched(ci, 0); + atomic_or_uint(&ci->ci_want_resched, + RESCHED_IDLE | RESCHED_UPREEMPT); return; } l = ci->ci_data.cpu_onproc; if (l == NULL) { return; } + /* + * Can only be spc_lwplock or a turnstile lock at this point + * (if we interrupted priority inheritance trylock dance). + */ + KASSERT(l->l_mutex != spc->spc_mutex); switch (l->l_class) { case SCHED_FIFO: /* No timeslicing for FIFO jobs. */ break; case SCHED_RR: /* Force it into mi_switch() to look for other jobs to run. */ - cpu_need_resched(ci, RESCHED_KPREEMPT); + atomic_or_uint(&l->l_dopreempt, DOPREEMPT_ACTIVE); + cpu_need_resched(ci, l, RESCHED_KPREEMPT); break; default: if (spc->spc_flags & SPCF_SHOULDYIELD) { @@ -132,7 +137,8 @@ sched_tick(struct cpu_info *ci) * due to buggy or inefficient code. Force a * kernel preemption. */ - cpu_need_resched(ci, RESCHED_KPREEMPT); + atomic_or_uint(&l->l_dopreempt, DOPREEMPT_ACTIVE); + cpu_need_resched(ci, l, RESCHED_KPREEMPT); } else if (spc->spc_flags & SPCF_SEENRR) { /* * The process has already been through a roundrobin @@ -140,7 +146,7 @@ sched_tick(struct cpu_info *ci) * Indicate that the process should yield. */ spc->spc_flags |= SPCF_SHOULDYIELD; - cpu_need_resched(ci, 0); + cpu_need_resched(ci, l, RESCHED_UPREEMPT); } else { spc->spc_flags |= SPCF_SEENRR; } Index: src/sys/kern/sys_aio.c diff -u src/sys/kern/sys_aio.c:1.44 src/sys/kern/sys_aio.c:1.45 --- src/sys/kern/sys_aio.c:1.44 Sun Feb 10 17:13:33 2019 +++ src/sys/kern/sys_aio.c Sat Nov 23 19:42:52 2019 @@ -1,4 +1,4 @@ -/* $NetBSD: sys_aio.c,v 1.44 2019/02/10 17:13:33 christos Exp $ */ +/* $NetBSD: sys_aio.c,v 1.45 2019/11/23 19:42:52 ad Exp $ */ /* * Copyright (c) 2007 Mindaugas Rasiukevicius <rmind at NetBSD org> @@ -32,7 +32,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: sys_aio.c,v 1.44 2019/02/10 17:13:33 christos Exp $"); +__KERNEL_RCSID(0, "$NetBSD: sys_aio.c,v 1.45 2019/11/23 19:42:52 ad Exp $"); #ifdef _KERNEL_OPT #include "opt_ddb.h" @@ -229,10 +229,9 @@ aio_procinit(struct proc *p) /* Complete the initialization of thread, and run it */ aio->aio_worker = l; lwp_lock(l); - l->l_stat = LSRUN; - l->l_priority = MAXPRI_USER; - sched_enqueue(l, false); - lwp_unlock(l); + lwp_changepri(l, MAXPRI_USER); + setrunnable(l); + /* LWP now unlocked */ mutex_exit(p->p_lock); return 0; Index: src/sys/kern/sys_lwp.c diff -u src/sys/kern/sys_lwp.c:1.70 src/sys/kern/sys_lwp.c:1.71 --- src/sys/kern/sys_lwp.c:1.70 Mon Sep 30 21:13:33 2019 +++ src/sys/kern/sys_lwp.c Sat Nov 23 19:42:52 2019 @@ -1,7 +1,7 @@ -/* $NetBSD: sys_lwp.c,v 1.70 2019/09/30 21:13:33 kamil Exp $ */ +/* $NetBSD: sys_lwp.c,v 1.71 2019/11/23 19:42:52 ad Exp $ */ /*- - * Copyright (c) 2001, 2006, 2007, 2008 The NetBSD Foundation, Inc. + * Copyright (c) 2001, 2006, 2007, 2008, 2019 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation @@ -35,7 +35,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: sys_lwp.c,v 1.70 2019/09/30 21:13:33 kamil Exp $"); +__KERNEL_RCSID(0, "$NetBSD: sys_lwp.c,v 1.71 2019/11/23 19:42:52 ad Exp $"); #include <sys/param.h> #include <sys/systm.h> @@ -102,7 +102,6 @@ do_lwp_create(lwp_t *l, void *arg, u_lon { struct proc *p = l->l_proc; struct lwp *l2; - struct schedstate_percpu *spc; vaddr_t uaddr; int error; @@ -120,35 +119,7 @@ do_lwp_create(lwp_t *l, void *arg, u_lon } *new_lwp = l2->l_lid; - - /* - * Set the new LWP running, unless the caller has requested that - * it be created in suspended state. If the process is stopping, - * then the LWP is created stopped. - */ - mutex_enter(p->p_lock); - lwp_lock(l2); - spc = &l2->l_cpu->ci_schedstate; - if ((flags & LWP_SUSPENDED) == 0 && - (l->l_flag & (LW_WREBOOT | LW_WSUSPEND | LW_WEXIT)) == 0) { - if (p->p_stat == SSTOP || (p->p_sflag & PS_STOPPING) != 0) { - KASSERT(l2->l_wchan == NULL); - l2->l_stat = LSSTOP; - p->p_nrlwps--; - lwp_unlock_to(l2, spc->spc_lwplock); - } else { - KASSERT(lwp_locked(l2, spc->spc_mutex)); - l2->l_stat = LSRUN; - sched_enqueue(l2, false); - lwp_unlock(l2); - } - } else { - l2->l_stat = LSSUSPENDED; - p->p_nrlwps--; - lwp_unlock_to(l2, spc->spc_lwplock); - } - mutex_exit(p->p_lock); - + lwp_start(l2, flags); return 0; } Index: src/sys/rump/librump/rumpkern/scheduler.c diff -u src/sys/rump/librump/rumpkern/scheduler.c:1.44 src/sys/rump/librump/rumpkern/scheduler.c:1.45 --- src/sys/rump/librump/rumpkern/scheduler.c:1.44 Fri Feb 19 18:38:37 2016 +++ src/sys/rump/librump/rumpkern/scheduler.c Sat Nov 23 19:42:52 2019 @@ -1,4 +1,4 @@ -/* $NetBSD: scheduler.c,v 1.44 2016/02/19 18:38:37 pooka Exp $ */ +/* $NetBSD: scheduler.c,v 1.45 2019/11/23 19:42:52 ad Exp $ */ /* * Copyright (c) 2010, 2011 Antti Kantee. All Rights Reserved. @@ -26,7 +26,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: scheduler.c,v 1.44 2016/02/19 18:38:37 pooka Exp $"); +__KERNEL_RCSID(0, "$NetBSD: scheduler.c,v 1.45 2019/11/23 19:42:52 ad Exp $"); #include <sys/param.h> #include <sys/atomic.h> @@ -572,15 +572,32 @@ sched_nice(struct proc *p, int level) } void -sched_enqueue(struct lwp *l, bool swtch) +setrunnable(struct lwp *l) +{ + + sched_enqueue(l); +} + +void +sched_enqueue(struct lwp *l) { - if (swtch) - panic("sched_enqueue with switcheroo"); rump_thread_allow(l); } void +sched_resched_cpu(struct cpu_info *ci, pri_t pri, bool unlock) +{ + +} + +void +sched_resched_lwp(struct lwp *l, bool unlock) +{ + +} + +void sched_dequeue(struct lwp *l) { Index: src/sys/sys/cpu.h diff -u src/sys/sys/cpu.h:1.43 src/sys/sys/cpu.h:1.44 --- src/sys/sys/cpu.h:1.43 Thu Apr 19 21:19:07 2018 +++ src/sys/sys/cpu.h Sat Nov 23 19:42:52 2019 @@ -1,4 +1,4 @@ -/* $NetBSD: cpu.h,v 1.43 2018/04/19 21:19:07 christos Exp $ */ +/* $NetBSD: cpu.h,v 1.44 2019/11/23 19:42:52 ad Exp $ */ /*- * Copyright (c) 2007 YAMAMOTO Takashi, @@ -50,17 +50,8 @@ void cpu_idle(void); #endif #endif -/* - * cpu_need_resched() must always be called with the target CPU - * locked (via spc_lock() or another route), unless called locally. - * If called locally, the caller need only be at IPL_SCHED. - */ #ifndef cpu_need_resched -void cpu_need_resched(struct cpu_info *, int); -#endif - -#ifndef cpu_did_resched -#define cpu_did_resched(l) /* nothing */ +void cpu_need_resched(struct cpu_info *, struct lwp *, int); #endif /* @@ -140,9 +131,13 @@ int cpu_ucode_md_open(firmware_handle_t #endif #endif /* !_LOCORE */ -/* flags for cpu_need_resched */ -#define RESCHED_LAZY 0x01 /* request a ctx switch */ -#define RESCHED_IMMED 0x02 /* request an immediate ctx switch */ -#define RESCHED_KPREEMPT 0x04 /* request in-kernel preemption */ +/* + * Flags for cpu_need_resched. RESCHED_KERNEL must be greater than + * RESCHED_USER; see sched_resched_cpu(). + */ +#define RESCHED_REMOTE 0x01 /* request is for a remote CPU */ +#define RESCHED_IDLE 0x02 /* idle LWP observed */ +#define RESCHED_UPREEMPT 0x04 /* immediate user ctx switch */ +#define RESCHED_KPREEMPT 0x08 /* immediate kernel ctx switch */ #endif /* !_SYS_CPU_H_ */ Index: src/sys/sys/lwp.h diff -u src/sys/sys/lwp.h:1.189 src/sys/sys/lwp.h:1.190 --- src/sys/sys/lwp.h:1.189 Thu Nov 21 19:47:21 2019 +++ src/sys/sys/lwp.h Sat Nov 23 19:42:52 2019 @@ -1,7 +1,7 @@ -/* $NetBSD: lwp.h,v 1.189 2019/11/21 19:47:21 ad Exp $ */ +/* $NetBSD: lwp.h,v 1.190 2019/11/23 19:42:52 ad Exp $ */ /* - * Copyright (c) 2001, 2006, 2007, 2008, 2009, 2010 + * Copyright (c) 2001, 2006, 2007, 2008, 2009, 2010, 2019 * The NetBSD Foundation, Inc. * All rights reserved. * @@ -344,6 +344,7 @@ void lwp_exit(lwp_t *); void lwp_exit_switchaway(lwp_t *) __dead; int lwp_suspend(lwp_t *, lwp_t *); int lwp_create1(lwp_t *, const void *, size_t, u_long, lwpid_t *); +void lwp_start(lwp_t *, int); void lwp_update_creds(lwp_t *); void lwp_migrate(lwp_t *, struct cpu_info *); lwp_t * lwp_find2(pid_t, lwpid_t); Index: src/sys/sys/sched.h diff -u src/sys/sys/sched.h:1.76 src/sys/sys/sched.h:1.77 --- src/sys/sys/sched.h:1.76 Sun Jul 3 14:24:59 2016 +++ src/sys/sys/sched.h Sat Nov 23 19:42:52 2019 @@ -1,7 +1,8 @@ -/* $NetBSD: sched.h,v 1.76 2016/07/03 14:24:59 christos Exp $ */ +/* $NetBSD: sched.h,v 1.77 2019/11/23 19:42:52 ad Exp $ */ /*- - * Copyright (c) 1999, 2000, 2001, 2002, 2007, 2008 The NetBSD Foundation, Inc. + * Copyright (c) 1999, 2000, 2001, 2002, 2007, 2008, 2019 + * The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation @@ -228,7 +229,9 @@ void sched_pstats_hook(struct lwp *, in /* Runqueue-related functions */ bool sched_curcpu_runnable_p(void); void sched_dequeue(struct lwp *); -void sched_enqueue(struct lwp *, bool); +void sched_enqueue(struct lwp *); +void sched_resched_cpu(struct cpu_info *, pri_t, bool); +void sched_resched_lwp(struct lwp *, bool); struct lwp * sched_nextlwp(void); void sched_oncpu(struct lwp *); void sched_newts(struct lwp *);