Module Name: src Committed By: ad Date: Sat Sep 23 18:48:05 UTC 2023
Modified Files: src/sys/kern: kern_condvar.c kern_lwp.c kern_mutex.c kern_rwlock.c kern_sleepq.c kern_synch.c kern_timeout.c kern_turnstile.c sys_lwp.c sys_select.c src/sys/rump/librump/rumpkern: sleepq.c src/sys/sys: lwp.h sleepq.h syncobj.h userret.h Log Message: - Simplify how priority boost for blocking in kernel is handled. Rather than setting it up at each site where we block, make it a property of syncobj_t. Then, do not hang onto the priority boost until userret(), drop it as soon as the LWP is out of the run queue and onto a CPU. Holding onto it longer is of questionable benefit. - This allows two members of lwp_t to be deleted, and mi_userret() to be simplified a lot (next step: trim it down to a single conditional). - While here, constify syncobj_t and de-inline a bunch of small functions like lwp_lock() which turn out not to be small after all (I don't know why, but atomic_*_relaxed() seem to provoke a compiler shitfit above and beyond what volatile does). To generate a diff of this commit: cvs rdiff -u -r1.55 -r1.56 src/sys/kern/kern_condvar.c cvs rdiff -u -r1.255 -r1.256 src/sys/kern/kern_lwp.c cvs rdiff -u -r1.109 -r1.110 src/sys/kern/kern_mutex.c cvs rdiff -u -r1.72 -r1.73 src/sys/kern/kern_rwlock.c cvs rdiff -u -r1.74 -r1.75 src/sys/kern/kern_sleepq.c cvs rdiff -u -r1.358 -r1.359 src/sys/kern/kern_synch.c cvs rdiff -u -r1.76 -r1.77 src/sys/kern/kern_timeout.c cvs rdiff -u -r1.49 -r1.50 src/sys/kern/kern_turnstile.c cvs rdiff -u -r1.84 -r1.85 src/sys/kern/sys_lwp.c cvs rdiff -u -r1.61 -r1.62 src/sys/kern/sys_select.c cvs rdiff -u -r1.23 -r1.24 src/sys/rump/librump/rumpkern/sleepq.c cvs rdiff -u -r1.220 -r1.221 src/sys/sys/lwp.h cvs rdiff -u -r1.36 -r1.37 src/sys/sys/sleepq.h cvs rdiff -u -r1.15 -r1.16 src/sys/sys/syncobj.h cvs rdiff -u -r1.33 -r1.34 src/sys/sys/userret.h Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/sys/kern/kern_condvar.c diff -u src/sys/kern/kern_condvar.c:1.55 src/sys/kern/kern_condvar.c:1.56 --- src/sys/kern/kern_condvar.c:1.55 Mon Jul 17 12:54:29 2023 +++ src/sys/kern/kern_condvar.c Sat Sep 23 18:48:04 2023 @@ -1,7 +1,8 @@ -/* $NetBSD: kern_condvar.c,v 1.55 2023/07/17 12:54:29 riastradh Exp $ */ +/* $NetBSD: kern_condvar.c,v 1.56 2023/09/23 18:48:04 ad Exp $ */ /*- - * Copyright (c) 2006, 2007, 2008, 2019, 2020 The NetBSD Foundation, Inc. + * Copyright (c) 2006, 2007, 2008, 2019, 2020, 2023 + * The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation @@ -34,7 +35,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: kern_condvar.c,v 1.55 2023/07/17 12:54:29 riastradh Exp $"); +__KERNEL_RCSID(0, "$NetBSD: kern_condvar.c,v 1.56 2023/09/23 18:48:04 ad Exp $"); #include <sys/param.h> #include <sys/systm.h> @@ -72,6 +73,7 @@ static inline void cv_wakeup_all(kcondva syncobj_t cv_syncobj = { .sobj_name = "cv", .sobj_flag = SOBJ_SLEEPQ_SORTED, + .sobj_boostpri = PRI_KERNEL, .sobj_unsleep = cv_unsleep, .sobj_changepri = sleepq_changepri, .sobj_lendpri = sleepq_lendpri, @@ -127,7 +129,6 @@ cv_enter(kcondvar_t *cv, kmutex_t *mtx, KASSERT(!cpu_intr_p()); KASSERT((l->l_pflag & LP_INTR) == 0 || panicstr != NULL); - l->l_kpriority = true; mp = sleepq_hashlock(cv); sq = CV_SLEEPQ(cv); sleepq_enter(sq, l, mp); Index: src/sys/kern/kern_lwp.c diff -u src/sys/kern/kern_lwp.c:1.255 src/sys/kern/kern_lwp.c:1.256 --- src/sys/kern/kern_lwp.c:1.255 Sat Sep 23 18:21:11 2023 +++ src/sys/kern/kern_lwp.c Sat Sep 23 18:48:04 2023 @@ -1,4 +1,4 @@ -/* $NetBSD: kern_lwp.c,v 1.255 2023/09/23 18:21:11 ad Exp $ */ +/* $NetBSD: kern_lwp.c,v 1.256 2023/09/23 18:48:04 ad Exp $ */ /*- * Copyright (c) 2001, 2006, 2007, 2008, 2009, 2019, 2020, 2023 @@ -217,7 +217,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: kern_lwp.c,v 1.255 2023/09/23 18:21:11 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: kern_lwp.c,v 1.256 2023/09/23 18:48:04 ad Exp $"); #include "opt_ddb.h" #include "opt_lockdebug.h" @@ -851,13 +851,6 @@ lwp_create(lwp_t *l1, proc_t *p2, vaddr_ return EAGAIN; } - /* - * If vfork(), we want the LWP to run fast and on the same CPU - * as its parent, so that it can reuse the VM context and cache - * footprint on the local CPU. - */ - l2->l_kpriority = ((flags & LWP_VFORK) ? true : false); - l2->l_kpribase = PRI_KERNEL; l2->l_priority = l1->l_priority; l2->l_inheritedprio = -1; l2->l_protectprio = -1; @@ -1619,34 +1612,124 @@ lwp_unsleep(lwp_t *l, bool unlock) } /* + * Lock an LWP. + */ +void +lwp_lock(lwp_t *l) +{ + kmutex_t *old = atomic_load_consume(&l->l_mutex); + + /* + * Note: mutex_spin_enter() will have posted a read barrier. + * Re-test l->l_mutex. If it has changed, we need to try again. + */ + mutex_spin_enter(old); + while (__predict_false(atomic_load_relaxed(&l->l_mutex) != old)) { + mutex_spin_exit(old); + old = atomic_load_consume(&l->l_mutex); + mutex_spin_enter(old); + } +} + +/* + * Unlock an LWP. + */ +void +lwp_unlock(lwp_t *l) +{ + + mutex_spin_exit(l->l_mutex); +} + +void +lwp_changepri(lwp_t *l, pri_t pri) +{ + + KASSERT(mutex_owned(l->l_mutex)); + + if (l->l_priority == pri) + return; + + (*l->l_syncobj->sobj_changepri)(l, pri); + KASSERT(l->l_priority == pri); +} + +void +lwp_lendpri(lwp_t *l, pri_t pri) +{ + KASSERT(mutex_owned(l->l_mutex)); + + (*l->l_syncobj->sobj_lendpri)(l, pri); + KASSERT(l->l_inheritedprio == pri); +} + +pri_t +lwp_eprio(lwp_t *l) +{ + pri_t boostpri = l->l_syncobj->sobj_boostpri; + pri_t pri = l->l_priority; + + KASSERT(mutex_owned(l->l_mutex)); + + /* + * Timeshared/user LWPs get a temporary priority boost for blocking + * in kernel. This is key to good interactive response on a loaded + * system: without it, things will seem very sluggish to the user. + * + * The function of the boost is to get the LWP onto a CPU and + * running quickly. Once that happens the LWP loses the priority + * boost and could be preempted very quickly by another LWP but that + * won't happen often enough to be a annoyance. + */ + if (pri <= MAXPRI_USER && boostpri > PRI_USER) + pri = (pri >> 1) + boostpri; + + return MAX(l->l_auxprio, pri); +} + +/* * Handle exceptions for mi_userret(). Called if a member of LW_USERRET is - * set. + * set or a preemption is required. */ void lwp_userret(struct lwp *l) { struct proc *p; - int sig; + int sig, f; KASSERT(l == curlwp); KASSERT(l->l_stat == LSONPROC); p = l->l_proc; - /* - * It is safe to do this read unlocked on a MP system.. - */ - while ((l->l_flag & LW_USERRET) != 0) { + for (;;) { + /* + * This is the main location that user preemptions are + * processed. + */ + preempt_point(); + + /* + * It is safe to do this unlocked and without raised SPL, + * since whenever a flag of interest is added to l_flag the + * LWP will take an AST and come down this path again. If a + * remote CPU posts the AST, it will be done with an IPI + * (strongly synchronising). + */ + if ((f = atomic_load_relaxed(&l->l_flag) & LW_USERRET) == 0) { + return; + } + /* * Process pending signals first, unless the process * is dumping core or exiting, where we will instead * enter the LW_WSUSPEND case below. */ - if ((l->l_flag & (LW_PENDSIG | LW_WCORE | LW_WEXIT)) == - LW_PENDSIG) { + if ((f & (LW_PENDSIG | LW_WCORE | LW_WEXIT)) == LW_PENDSIG) { mutex_enter(p->p_lock); while ((sig = issignal(l)) != 0) postsig(sig); mutex_exit(p->p_lock); + continue; } /* @@ -1660,7 +1743,7 @@ lwp_userret(struct lwp *l) * p->p_lwpcv so that sigexit() will write the core file out * once all other LWPs are suspended. */ - if ((l->l_flag & LW_WSUSPEND) != 0) { + if ((f & LW_WSUSPEND) != 0) { pcu_save_all(l); mutex_enter(p->p_lock); p->p_nrlwps--; @@ -1672,23 +1755,30 @@ lwp_userret(struct lwp *l) lwp_lock(l); spc_lock(l->l_cpu); mi_switch(l); + continue; } - /* Process is exiting. */ - if ((l->l_flag & LW_WEXIT) != 0) { + /* + * Process is exiting. The core dump and signal cases must + * be handled first. + */ + if ((f & LW_WEXIT) != 0) { lwp_exit(l); KASSERT(0); /* NOTREACHED */ } - /* update lwpctl processor (for vfork child_return) */ - if (l->l_flag & LW_LWPCTL) { + /* + * Update lwpctl processor (for vfork child_return). + */ + if ((f & LW_LWPCTL) != 0) { lwp_lock(l); KASSERT(kpreempt_disabled()); l->l_lwpctl->lc_curcpu = (int)cpu_index(l->l_cpu); l->l_lwpctl->lc_pctr++; l->l_flag &= ~LW_LWPCTL; lwp_unlock(l); + continue; } } } Index: src/sys/kern/kern_mutex.c diff -u src/sys/kern/kern_mutex.c:1.109 src/sys/kern/kern_mutex.c:1.110 --- src/sys/kern/kern_mutex.c:1.109 Thu Sep 7 20:05:42 2023 +++ src/sys/kern/kern_mutex.c Sat Sep 23 18:48:04 2023 @@ -1,7 +1,8 @@ -/* $NetBSD: kern_mutex.c,v 1.109 2023/09/07 20:05:42 ad Exp $ */ +/* $NetBSD: kern_mutex.c,v 1.110 2023/09/23 18:48:04 ad Exp $ */ /*- - * Copyright (c) 2002, 2006, 2007, 2008, 2019 The NetBSD Foundation, Inc. + * Copyright (c) 2002, 2006, 2007, 2008, 2019, 2023 + * The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation @@ -40,7 +41,7 @@ #define __MUTEX_PRIVATE #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: kern_mutex.c,v 1.109 2023/09/07 20:05:42 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: kern_mutex.c,v 1.110 2023/09/23 18:48:04 ad Exp $"); #include <sys/param.h> #include <sys/atomic.h> @@ -298,6 +299,7 @@ lockops_t mutex_adaptive_lockops = { syncobj_t mutex_syncobj = { .sobj_name = "mutex", .sobj_flag = SOBJ_SLEEPQ_SORTED, + .sobj_boostpri = PRI_KERNEL, .sobj_unsleep = turnstile_unsleep, .sobj_changepri = turnstile_changepri, .sobj_lendpri = sleepq_lendpri, Index: src/sys/kern/kern_rwlock.c diff -u src/sys/kern/kern_rwlock.c:1.72 src/sys/kern/kern_rwlock.c:1.73 --- src/sys/kern/kern_rwlock.c:1.72 Thu Sep 7 20:05:42 2023 +++ src/sys/kern/kern_rwlock.c Sat Sep 23 18:48:04 2023 @@ -1,7 +1,7 @@ -/* $NetBSD: kern_rwlock.c,v 1.72 2023/09/07 20:05:42 ad Exp $ */ +/* $NetBSD: kern_rwlock.c,v 1.73 2023/09/23 18:48:04 ad Exp $ */ /*- - * Copyright (c) 2002, 2006, 2007, 2008, 2009, 2019, 2020 + * Copyright (c) 2002, 2006, 2007, 2008, 2009, 2019, 2020, 2023 * The NetBSD Foundation, Inc. * All rights reserved. * @@ -45,7 +45,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: kern_rwlock.c,v 1.72 2023/09/07 20:05:42 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: kern_rwlock.c,v 1.73 2023/09/23 18:48:04 ad Exp $"); #include "opt_lockdebug.h" @@ -120,9 +120,14 @@ lockops_t rwlock_lockops = { .lo_dump = rw_dump, }; +/* + * Give rwlock holders an extra-high priority boost on-blocking due to + * direct handoff. XXX To be revisited. + */ syncobj_t rw_syncobj = { .sobj_name = "rw", .sobj_flag = SOBJ_SLEEPQ_SORTED, + .sobj_boostpri = PRI_KTHREAD, .sobj_unsleep = turnstile_unsleep, .sobj_changepri = turnstile_changepri, .sobj_lendpri = sleepq_lendpri, Index: src/sys/kern/kern_sleepq.c diff -u src/sys/kern/kern_sleepq.c:1.74 src/sys/kern/kern_sleepq.c:1.75 --- src/sys/kern/kern_sleepq.c:1.74 Sun Apr 9 09:18:09 2023 +++ src/sys/kern/kern_sleepq.c Sat Sep 23 18:48:04 2023 @@ -1,7 +1,8 @@ -/* $NetBSD: kern_sleepq.c,v 1.74 2023/04/09 09:18:09 riastradh Exp $ */ +/* $NetBSD: kern_sleepq.c,v 1.75 2023/09/23 18:48:04 ad Exp $ */ /*- - * Copyright (c) 2006, 2007, 2008, 2009, 2019, 2020 The NetBSD Foundation, Inc. + * Copyright (c) 2006, 2007, 2008, 2009, 2019, 2020, 2023 + * The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation @@ -35,7 +36,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: kern_sleepq.c,v 1.74 2023/04/09 09:18:09 riastradh Exp $"); +__KERNEL_RCSID(0, "$NetBSD: kern_sleepq.c,v 1.75 2023/09/23 18:48:04 ad Exp $"); #include <sys/param.h> #include <sys/kernel.h> @@ -211,6 +212,25 @@ sleepq_insert(sleepq_t *sq, lwp_t *l, sy } /* + * sleepq_enter: + * + * Prepare to block on a sleep queue, after which any interlock can be + * safely released. + */ +void +sleepq_enter(sleepq_t *sq, lwp_t *l, kmutex_t *mp) +{ + + /* + * Acquire the per-LWP mutex and lend it our sleep queue lock. + * Once interlocked, we can release the kernel lock. + */ + lwp_lock(l); + lwp_unlock_to(l, mp); + KERNEL_UNLOCK_ALL(NULL, &l->l_biglocks); +} + +/* * sleepq_enqueue: * * Enter an LWP into the sleep queue and prepare for sleep. The sleep @@ -303,7 +323,7 @@ sleepq_uncatch(lwp_t *l) * timo is a timeout in ticks. timo = 0 specifies an infinite timeout. */ int -sleepq_block(int timo, bool catch_p, struct syncobj *syncobj) +sleepq_block(int timo, bool catch_p, syncobj_t *syncobj) { int error = 0, sig; struct proc *p; Index: src/sys/kern/kern_synch.c diff -u src/sys/kern/kern_synch.c:1.358 src/sys/kern/kern_synch.c:1.359 --- src/sys/kern/kern_synch.c:1.358 Mon Jul 17 12:54:29 2023 +++ src/sys/kern/kern_synch.c Sat Sep 23 18:48:04 2023 @@ -1,7 +1,7 @@ -/* $NetBSD: kern_synch.c,v 1.358 2023/07/17 12:54:29 riastradh Exp $ */ +/* $NetBSD: kern_synch.c,v 1.359 2023/09/23 18:48:04 ad Exp $ */ /*- - * Copyright (c) 1999, 2000, 2004, 2006, 2007, 2008, 2009, 2019, 2020 + * Copyright (c) 1999, 2000, 2004, 2006, 2007, 2008, 2009, 2019, 2020, 2023 * The NetBSD Foundation, Inc. * All rights reserved. * @@ -69,7 +69,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: kern_synch.c,v 1.358 2023/07/17 12:54:29 riastradh Exp $"); +__KERNEL_RCSID(0, "$NetBSD: kern_synch.c,v 1.359 2023/09/23 18:48:04 ad Exp $"); #include "opt_kstack.h" #include "opt_ddb.h" @@ -115,6 +115,7 @@ static void sched_lendpri(struct lwp *, syncobj_t sleep_syncobj = { .sobj_name = "sleep", .sobj_flag = SOBJ_SLEEPQ_SORTED, + .sobj_boostpri = PRI_KERNEL, .sobj_unsleep = sleepq_unsleep, .sobj_changepri = sleepq_changepri, .sobj_lendpri = sleepq_lendpri, @@ -124,6 +125,7 @@ syncobj_t sleep_syncobj = { syncobj_t sched_syncobj = { .sobj_name = "sched", .sobj_flag = SOBJ_SLEEPQ_SORTED, + .sobj_boostpri = PRI_USER, .sobj_unsleep = sched_unsleep, .sobj_changepri = sched_changepri, .sobj_lendpri = sched_lendpri, @@ -133,6 +135,7 @@ syncobj_t sched_syncobj = { syncobj_t kpause_syncobj = { .sobj_name = "kpause", .sobj_flag = SOBJ_SLEEPQ_NULL, + .sobj_boostpri = PRI_KERNEL, .sobj_unsleep = sleepq_unsleep, .sobj_changepri = sleepq_changepri, .sobj_lendpri = sleepq_lendpri, @@ -193,7 +196,6 @@ tsleep(wchan_t ident, pri_t priority, co return 0; } - l->l_kpriority = true; catch_p = priority & PCATCH; sq = sleeptab_lookup(&sleeptab, ident, &mp); sleepq_enter(sq, l, mp); @@ -219,7 +221,6 @@ mtsleep(wchan_t ident, pri_t priority, c return 0; } - l->l_kpriority = true; catch_p = priority & PCATCH; sq = sleeptab_lookup(&sleeptab, ident, &mp); sleepq_enter(sq, l, mp); @@ -249,7 +250,6 @@ kpause(const char *wmesg, bool intr, int if (mtx != NULL) mutex_exit(mtx); - l->l_kpriority = true; lwp_lock(l); KERNEL_UNLOCK_ALL(NULL, &l->l_biglocks); sleepq_enqueue(NULL, l, wmesg, &kpause_syncobj, intr); @@ -293,8 +293,6 @@ yield(void) KASSERT(lwp_locked(l, l->l_cpu->ci_schedstate.spc_lwplock)); KASSERT(l->l_stat == LSONPROC); - /* Voluntary - ditch kpriority boost. */ - l->l_kpriority = false; spc_lock(l->l_cpu); mi_switch(l); KERNEL_LOCK(l->l_biglocks, l); @@ -307,7 +305,6 @@ yield(void) * * - It's counted differently (involuntary vs. voluntary). * - Realtime threads go to the head of their runqueue vs. tail for yield(). - * - Priority boost is retained unless LWP has exceeded timeslice. */ void preempt(void) @@ -321,10 +318,6 @@ preempt(void) KASSERT(l->l_stat == LSONPROC); spc_lock(l->l_cpu); - /* Involuntary - keep kpriority boost unless a CPU hog. */ - if ((l->l_cpu->ci_schedstate.spc_flags & SPCF_SHOULDYIELD) != 0) { - l->l_kpriority = false; - } l->l_pflag |= LP_PREEMPTING; mi_switch(l); KERNEL_LOCK(l->l_biglocks, l); @@ -431,7 +424,6 @@ kpreempt(uintptr_t where) kpreempt_ev_immed.ev_count++; } lwp_lock(l); - /* Involuntary - keep kpriority boost. */ l->l_pflag |= LP_PREEMPTING; spc_lock(l->l_cpu); mi_switch(l); Index: src/sys/kern/kern_timeout.c diff -u src/sys/kern/kern_timeout.c:1.76 src/sys/kern/kern_timeout.c:1.77 --- src/sys/kern/kern_timeout.c:1.76 Tue Jun 27 01:19:44 2023 +++ src/sys/kern/kern_timeout.c Sat Sep 23 18:48:04 2023 @@ -1,4 +1,4 @@ -/* $NetBSD: kern_timeout.c,v 1.76 2023/06/27 01:19:44 pho Exp $ */ +/* $NetBSD: kern_timeout.c,v 1.77 2023/09/23 18:48:04 ad Exp $ */ /*- * Copyright (c) 2003, 2006, 2007, 2008, 2009, 2019 The NetBSD Foundation, Inc. @@ -59,7 +59,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: kern_timeout.c,v 1.76 2023/06/27 01:19:44 pho Exp $"); +__KERNEL_RCSID(0, "$NetBSD: kern_timeout.c,v 1.77 2023/09/23 18:48:04 ad Exp $"); /* * Timeouts are kept in a hierarchical timing wheel. The c_time is the @@ -608,7 +608,6 @@ callout_wait(callout_impl_t *c, void *in KASSERT(l->l_wchan == NULL); cc->cc_nwait++; cc->cc_ev_block.ev_count++; - l->l_kpriority = true; sleepq_enter(&cc->cc_sleepq, l, cc->cc_lock); sleepq_enqueue(&cc->cc_sleepq, cc, "callout", &sleep_syncobj, false); Index: src/sys/kern/kern_turnstile.c diff -u src/sys/kern/kern_turnstile.c:1.49 src/sys/kern/kern_turnstile.c:1.50 --- src/sys/kern/kern_turnstile.c:1.49 Sat Sep 23 18:21:11 2023 +++ src/sys/kern/kern_turnstile.c Sat Sep 23 18:48:04 2023 @@ -1,4 +1,4 @@ -/* $NetBSD: kern_turnstile.c,v 1.49 2023/09/23 18:21:11 ad Exp $ */ +/* $NetBSD: kern_turnstile.c,v 1.50 2023/09/23 18:48:04 ad Exp $ */ /*- * Copyright (c) 2002, 2006, 2007, 2009, 2019, 2020, 2023 @@ -61,7 +61,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: kern_turnstile.c,v 1.49 2023/09/23 18:21:11 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: kern_turnstile.c,v 1.50 2023/09/23 18:48:04 ad Exp $"); #include <sys/param.h> #include <sys/lockdebug.h> @@ -371,7 +371,6 @@ turnstile_block(turnstile_t *ts, int q, tschain_t *tc; kmutex_t *lock; sleepq_t *sq; - pri_t obase; u_int hash; hash = TS_HASH(obj); @@ -417,10 +416,6 @@ turnstile_block(turnstile_t *ts, int q, ts->ts_waiters[q]++; sleepq_enter(sq, l, lock); LOCKDEBUG_BARRIER(lock, 1); - l->l_kpriority = true; - obase = l->l_kpribase; - if (obase < PRI_KTHREAD) - l->l_kpribase = PRI_KTHREAD; sleepq_enqueue(sq, obj, "tstile", sobj, false); /* @@ -432,7 +427,6 @@ turnstile_block(turnstile_t *ts, int q, KASSERT(lock == l->l_mutex); turnstile_lendpri(l); sleepq_block(0, false, sobj); - l->l_kpribase = obase; KPREEMPT_ENABLE(l); } Index: src/sys/kern/sys_lwp.c diff -u src/sys/kern/sys_lwp.c:1.84 src/sys/kern/sys_lwp.c:1.85 --- src/sys/kern/sys_lwp.c:1.84 Mon Jul 17 12:54:29 2023 +++ src/sys/kern/sys_lwp.c Sat Sep 23 18:48:04 2023 @@ -1,7 +1,8 @@ -/* $NetBSD: sys_lwp.c,v 1.84 2023/07/17 12:54:29 riastradh Exp $ */ +/* $NetBSD: sys_lwp.c,v 1.85 2023/09/23 18:48:04 ad Exp $ */ /*- - * Copyright (c) 2001, 2006, 2007, 2008, 2019, 2020 The NetBSD Foundation, Inc. + * Copyright (c) 2001, 2006, 2007, 2008, 2019, 2020, 2023 + * The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation @@ -35,7 +36,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: sys_lwp.c,v 1.84 2023/07/17 12:54:29 riastradh Exp $"); +__KERNEL_RCSID(0, "$NetBSD: sys_lwp.c,v 1.85 2023/09/23 18:48:04 ad Exp $"); #include <sys/param.h> #include <sys/systm.h> @@ -57,9 +58,14 @@ __KERNEL_RCSID(0, "$NetBSD: sys_lwp.c,v static const stack_t lwp_ss_init = SS_INIT; +/* + * Parked LWPs get no priority boost on awakening as they blocked on + * user space objects. Maybe revisit? + */ syncobj_t lwp_park_syncobj = { .sobj_name = "lwp_park", .sobj_flag = SOBJ_SLEEPQ_NULL, + .sobj_boostpri = PRI_USER, .sobj_unsleep = sleepq_unsleep, .sobj_changepri = sleepq_changepri, .sobj_lendpri = sleepq_lendpri, Index: src/sys/kern/sys_select.c diff -u src/sys/kern/sys_select.c:1.61 src/sys/kern/sys_select.c:1.62 --- src/sys/kern/sys_select.c:1.61 Mon Jul 17 12:54:29 2023 +++ src/sys/kern/sys_select.c Sat Sep 23 18:48:04 2023 @@ -1,7 +1,8 @@ -/* $NetBSD: sys_select.c,v 1.61 2023/07/17 12:54:29 riastradh Exp $ */ +/* $NetBSD: sys_select.c,v 1.62 2023/09/23 18:48:04 ad Exp $ */ /*- - * Copyright (c) 2007, 2008, 2009, 2010, 2019, 2020 The NetBSD Foundation, Inc. + * Copyright (c) 2007, 2008, 2009, 2010, 2019, 2020, 2023 + * The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation @@ -84,7 +85,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: sys_select.c,v 1.61 2023/07/17 12:54:29 riastradh Exp $"); +__KERNEL_RCSID(0, "$NetBSD: sys_select.c,v 1.62 2023/09/23 18:48:04 ad Exp $"); #include <sys/param.h> #include <sys/systm.h> @@ -145,6 +146,7 @@ static const int sel_flag[] = { syncobj_t select_sobj = { .sobj_name = "select", .sobj_flag = SOBJ_SLEEPQ_LIFO, + .sobj_boostpri = PRI_KERNEL, .sobj_unsleep = sleepq_unsleep, .sobj_changepri = sleepq_changepri, .sobj_lendpri = sleepq_lendpri, @@ -320,7 +322,6 @@ state_check: } /* Nothing happen, therefore - sleep. */ l->l_selflag = SEL_BLOCKING; - l->l_kpriority = true; sleepq_enter(&sc->sc_sleepq, l, lock); sleepq_enqueue(&sc->sc_sleepq, sc, opname, &select_sobj, true); error = sleepq_block(timo, true, &select_sobj); Index: src/sys/rump/librump/rumpkern/sleepq.c diff -u src/sys/rump/librump/rumpkern/sleepq.c:1.23 src/sys/rump/librump/rumpkern/sleepq.c:1.24 --- src/sys/rump/librump/rumpkern/sleepq.c:1.23 Thu Jun 30 07:47:07 2022 +++ src/sys/rump/librump/rumpkern/sleepq.c Sat Sep 23 18:48:04 2023 @@ -1,4 +1,4 @@ -/* $NetBSD: sleepq.c,v 1.23 2022/06/30 07:47:07 knakahara Exp $ */ +/* $NetBSD: sleepq.c,v 1.24 2023/09/23 18:48:04 ad Exp $ */ /* * Copyright (c) 2008 Antti Kantee. All Rights Reserved. @@ -26,7 +26,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: sleepq.c,v 1.23 2022/06/30 07:47:07 knakahara Exp $"); +__KERNEL_RCSID(0, "$NetBSD: sleepq.c,v 1.24 2023/09/23 18:48:04 ad Exp $"); #include <sys/param.h> #include <sys/condvar.h> @@ -57,6 +57,15 @@ sleepq_destroy(sleepq_t *sq) } void +sleepq_enter(sleepq_t *sq, lwp_t *l, kmutex_t *mp) +{ + + lwp_lock(l); + lwp_unlock_to(l, mp); + KERNEL_UNLOCK_ALL(NULL, &l->l_biglocks); +} + +void sleepq_enqueue(sleepq_t *sq, wchan_t wc, const char *wmsg, syncobj_t *sob, bool catch_p) { @@ -69,7 +78,7 @@ sleepq_enqueue(sleepq_t *sq, wchan_t wc, } int -sleepq_block(int timo, bool catch, struct syncobj *syncobj __unused) +sleepq_block(int timo, bool catch, syncobj_t *syncobj __unused) { struct lwp *l = curlwp; int error = 0; @@ -166,3 +175,45 @@ lwp_unlock_to(struct lwp *l, kmutex_t *n atomic_store_release(&l->l_mutex, new); mutex_spin_exit(old); } + +void +lwp_lock(lwp_t *l) +{ + kmutex_t *old = atomic_load_consume(&l->l_mutex); + + mutex_spin_enter(old); + while (__predict_false(atomic_load_relaxed(&l->l_mutex) != old)) { + mutex_spin_exit(old); + old = atomic_load_consume(&l->l_mutex); + mutex_spin_enter(old); + } +} + +void +lwp_unlock(lwp_t *l) +{ + + mutex_spin_exit(l->l_mutex); +} + +void +lwp_changepri(lwp_t *l, pri_t pri) +{ + + /* fuck */ +} + +void +lwp_lendpri(lwp_t *l, pri_t pri) +{ + + /* you */ +} + +pri_t +lwp_eprio(lwp_t *l) +{ + + /* Antti */ + return l->l_priority; +} Index: src/sys/sys/lwp.h diff -u src/sys/sys/lwp.h:1.220 src/sys/sys/lwp.h:1.221 --- src/sys/sys/lwp.h:1.220 Sun Sep 10 14:31:24 2023 +++ src/sys/sys/lwp.h Sat Sep 23 18:48:05 2023 @@ -1,7 +1,7 @@ -/* $NetBSD: lwp.h,v 1.220 2023/09/10 14:31:24 ad Exp $ */ +/* $NetBSD: lwp.h,v 1.221 2023/09/23 18:48:05 ad Exp $ */ /* - * Copyright (c) 2001, 2006, 2007, 2008, 2009, 2010, 2019, 2020 + * Copyright (c) 2001, 2006, 2007, 2008, 2009, 2010, 2019, 2020, 2023 * The NetBSD Foundation, Inc. * All rights reserved. * @@ -81,6 +81,7 @@ static __inline struct cpu_info *lwp_get #include <sys/pcu.h> +typedef struct syncobj const syncobj_t; struct lockdebug; struct sysent; @@ -111,8 +112,6 @@ struct lwp { u_int l_slpticksum; /* l: Sum of ticks spent sleeping */ int l_biglocks; /* l: biglock count before sleep */ int l_class; /* l: scheduling class */ - int l_kpriority; /* !: has kernel priority boost */ - pri_t l_kpribase; /* !: kernel priority base level */ pri_t l_priority; /* l: scheduler priority */ pri_t l_inheritedprio;/* l: inherited priority */ pri_t l_protectprio; /* l: for PTHREAD_PRIO_PROTECT */ @@ -131,7 +130,7 @@ struct lwp { kcpuset_t *l_affinity; /* l: CPU set for affinity */ /* Synchronisation. */ - struct syncobj *l_syncobj; /* l: sync object operations set */ + syncobj_t *l_syncobj; /* l: sync object operations set */ LIST_ENTRY(lwp) l_sleepchain; /* l: sleep queue */ wchan_t l_wchan; /* l: sleep address */ const char *l_wmesg; /* l: reason for sleep */ @@ -351,10 +350,15 @@ void lwp0_init(void); void lwp_startup(lwp_t *, lwp_t *); void startlwp(void *); +void lwp_lock(lwp_t *); +void lwp_unlock(lwp_t *); +pri_t lwp_eprio(lwp_t *); int lwp_locked(lwp_t *, kmutex_t *); kmutex_t *lwp_setlock(lwp_t *, kmutex_t *); void lwp_unlock_to(lwp_t *, kmutex_t *); int lwp_trylock(lwp_t *); +void lwp_changepri(lwp_t *, pri_t); +void lwp_lendpri(lwp_t *, pri_t); void lwp_addref(lwp_t *); void lwp_delref(lwp_t *); void lwp_delref2(lwp_t *); @@ -403,67 +407,6 @@ int lwp_unpark(const lwpid_t *, const u_ /* DDB. */ void lwp_whatis(uintptr_t, void (*)(const char *, ...) __printflike(1, 2)); -/* - * Lock an LWP. XXX _MODULE - */ -static __inline void -lwp_lock(lwp_t *l) -{ - kmutex_t *old = atomic_load_consume(&l->l_mutex); - - /* - * Note: mutex_spin_enter() will have posted a read barrier. - * Re-test l->l_mutex. If it has changed, we need to try again. - */ - mutex_spin_enter(old); - while (__predict_false(atomic_load_relaxed(&l->l_mutex) != old)) { - mutex_spin_exit(old); - old = atomic_load_consume(&l->l_mutex); - mutex_spin_enter(old); - } -} - -/* - * Unlock an LWP. XXX _MODULE - */ -static __inline void -lwp_unlock(lwp_t *l) -{ - mutex_spin_exit(l->l_mutex); -} - -static __inline void -lwp_changepri(lwp_t *l, pri_t pri) -{ - KASSERT(mutex_owned(l->l_mutex)); - - if (l->l_priority == pri) - return; - - (*l->l_syncobj->sobj_changepri)(l, pri); - KASSERT(l->l_priority == pri); -} - -static __inline void -lwp_lendpri(lwp_t *l, pri_t pri) -{ - KASSERT(mutex_owned(l->l_mutex)); - - (*l->l_syncobj->sobj_lendpri)(l, pri); - KASSERT(l->l_inheritedprio == pri); -} - -static __inline pri_t -lwp_eprio(lwp_t *l) -{ - pri_t pri; - - pri = l->l_priority; - if ((l->l_flag & LW_SYSTEM) == 0 && l->l_kpriority && pri < PRI_KERNEL) - pri = (pri >> 1) + l->l_kpribase; - return MAX(l->l_auxprio, pri); -} - int lwp_create(lwp_t *, struct proc *, vaddr_t, int, void *, size_t, void (*)(void *), void *, lwp_t **, int, const sigset_t *, const stack_t *); Index: src/sys/sys/sleepq.h diff -u src/sys/sys/sleepq.h:1.36 src/sys/sys/sleepq.h:1.37 --- src/sys/sys/sleepq.h:1.36 Wed Oct 26 23:24:59 2022 +++ src/sys/sys/sleepq.h Sat Sep 23 18:48:05 2023 @@ -1,7 +1,7 @@ -/* $NetBSD: sleepq.h,v 1.36 2022/10/26 23:24:59 riastradh Exp $ */ +/* $NetBSD: sleepq.h,v 1.37 2023/09/23 18:48:05 ad Exp $ */ /*- - * Copyright (c) 2002, 2006, 2007, 2008, 2009, 2019, 2020 + * Copyright (c) 2002, 2006, 2007, 2008, 2009, 2019, 2020, 2023 * The NetBSD Foundation, Inc. * All rights reserved. * @@ -46,13 +46,14 @@ */ typedef struct sleepq sleepq_t; +typedef struct syncobj const syncobj_t; void sleepq_init(sleepq_t *); void sleepq_remove(sleepq_t *, lwp_t *); -void sleepq_enqueue(sleepq_t *, wchan_t, const char *, struct syncobj *, - bool); +void sleepq_enter(sleepq_t *, lwp_t *, kmutex_t *); +void sleepq_enqueue(sleepq_t *, wchan_t, const char *, syncobj_t *, bool); void sleepq_transfer(lwp_t *, sleepq_t *, sleepq_t *, wchan_t, const char *, - struct syncobj *, kmutex_t *, bool); + syncobj_t *, kmutex_t *, bool); void sleepq_uncatch(lwp_t *); void sleepq_unsleep(lwp_t *, bool); void sleepq_timeout(void *); @@ -60,7 +61,7 @@ void sleepq_wake(sleepq_t *, wchan_t, u_ int sleepq_abort(kmutex_t *, int); void sleepq_changepri(lwp_t *, pri_t); void sleepq_lendpri(lwp_t *, pri_t); -int sleepq_block(int, bool, struct syncobj *); +int sleepq_block(int, bool, syncobj_t *); #ifdef _KERNEL @@ -83,24 +84,7 @@ sleepq_dontsleep(lwp_t *l) return cold || (doing_shutdown && (panicstr || CURCPU_IDLE_P())); } -/* - * Prepare to block on a sleep queue, after which any interlock can be - * safely released. - */ -static __inline void -sleepq_enter(sleepq_t *sq, lwp_t *l, kmutex_t *mp) -{ - - /* - * Acquire the per-LWP mutex and lend it ours sleep queue lock. - * Once interlocked, we can release the kernel lock. - */ - lwp_lock(l); - lwp_unlock_to(l, mp); - KERNEL_UNLOCK_ALL(NULL, &l->l_biglocks); -} - -#endif +#endif /* _KERNEL */ #include <sys/sleeptab.h> Index: src/sys/sys/syncobj.h diff -u src/sys/sys/syncobj.h:1.15 src/sys/sys/syncobj.h:1.16 --- src/sys/sys/syncobj.h:1.15 Mon Jul 17 12:54:53 2023 +++ src/sys/sys/syncobj.h Sat Sep 23 18:48:05 2023 @@ -1,7 +1,7 @@ -/* $NetBSD: syncobj.h,v 1.15 2023/07/17 12:54:53 riastradh Exp $ */ +/* $NetBSD: syncobj.h,v 1.16 2023/09/23 18:48:05 ad Exp $ */ /*- - * Copyright (c) 2007, 2008, 2020 The NetBSD Foundation, Inc. + * Copyright (c) 2007, 2008, 2020, 2023 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation @@ -44,11 +44,12 @@ typedef volatile const void *wchan_t; typedef struct syncobj { char sobj_name[16]; u_int sobj_flag; + int sobj_boostpri; void (*sobj_unsleep)(struct lwp *, bool); void (*sobj_changepri)(struct lwp *, pri_t); void (*sobj_lendpri)(struct lwp *, pri_t); struct lwp *(*sobj_owner)(wchan_t); -} syncobj_t; +} const syncobj_t; struct lwp *syncobj_noowner(wchan_t); Index: src/sys/sys/userret.h diff -u src/sys/sys/userret.h:1.33 src/sys/sys/userret.h:1.34 --- src/sys/sys/userret.h:1.33 Thu Mar 26 20:19:06 2020 +++ src/sys/sys/userret.h Sat Sep 23 18:48:05 2023 @@ -1,7 +1,7 @@ -/* $NetBSD: userret.h,v 1.33 2020/03/26 20:19:06 ad Exp $ */ +/* $NetBSD: userret.h,v 1.34 2023/09/23 18:48:05 ad Exp $ */ /*- - * Copyright (c) 1998, 2000, 2003, 2006, 2008, 2019, 2020 + * Copyright (c) 1998, 2000, 2003, 2006, 2008, 2019, 2020, 2023 * The NetBSD Foundation, Inc. * All rights reserved. * @@ -30,39 +30,6 @@ * POSSIBILITY OF SUCH DAMAGE. */ -/*- - * Copyright (c) 1990 The Regents of the University of California. - * All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * William Jolitz. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - */ - #ifndef _SYS_USERRET_H_ #define _SYS_USERRET_H_ @@ -81,37 +48,16 @@ static __inline void mi_userret(struct lwp *l) { - struct cpu_info *ci; + int exception; KPREEMPT_DISABLE(l); - ci = l->l_cpu; - KASSERTMSG(ci->ci_biglock_count == 0, "kernel_lock leaked"); + KASSERTMSG(l->l_cpu->ci_biglock_count == 0, "kernel_lock leaked"); KASSERT(l->l_blcnt == 0); - if (__predict_false(ci->ci_want_resched)) { - preempt(); - ci = l->l_cpu; - } - if (__predict_false(l->l_flag & LW_USERRET)) { - KPREEMPT_ENABLE(l); + exception = l->l_cpu->ci_want_resched | (l->l_flag & LW_USERRET); + KPREEMPT_ENABLE(l); + if (__predict_false(exception)) { lwp_userret(l); - KPREEMPT_DISABLE(l); - ci = l->l_cpu; } - /* - * lwp_eprio() is too involved to use here unlocked. At this point - * it only matters for PTHREAD_PRIO_PROTECT; setting a too low value - * is OK because the scheduler will find out the true value if we - * end up in mi_switch(). - * - * This is being called on every syscall and trap, and remote CPUs - * regularly look at ci_schedstate. Keep the cache line in the - * SHARED state by only updating spc_curpriority if it has changed. - */ - l->l_kpriority = false; - if (ci->ci_schedstate.spc_curpriority != l->l_priority) { - ci->ci_schedstate.spc_curpriority = l->l_priority; - } - KPREEMPT_ENABLE(l); LOCKDEBUG_BARRIER(NULL, 0); KASSERT(l->l_nopreempt == 0);