On Sun, Jul 31, 2016 at 01:49:28PM +0300, Konstantin Belousov wrote:
[snip]
After an irc discussion, the following was produced (also available at:
https://people.freebsd.org/~mjg/lock_backoff_complete4.diff):
Differences:
- uint64_t usage was converted to u_int (also see r303584)
- currently unused features (cap limit and return value) were removed
- lock_delay args got packed into a dedicated structure
Note this patch requires the tree to be at least at r303584.
diff --git a/sys/kern/kern_mutex.c b/sys/kern/kern_mutex.c
index 0555a78..9b07b8b 100644
--- a/sys/kern/kern_mutex.c
+++ b/sys/kern/kern_mutex.c
@@ -55,6 +55,7 @@ __FBSDID("$FreeBSD$");
#include <sys/resourcevar.h>
#include <sys/sched.h>
#include <sys/sbuf.h>
+#include <sys/smp.h>
#include <sys/sysctl.h>
#include <sys/turnstile.h>
#include <sys/vmmeter.h>
@@ -138,6 +139,36 @@ struct lock_class lock_class_mtx_spin = {
#endif
};
+#ifdef ADAPTIVE_MUTEXES
+static SYSCTL_NODE(_debug, OID_AUTO, mtx, CTLFLAG_RD, NULL, "mtx debugging");
+
+static struct lock_delay_config mtx_delay = {
+ .initial = 1000,
+ .step = 500,
+ .min = 100,
+ .max = 5000,
+};
+
+SYSCTL_INT(_debug_mtx, OID_AUTO, delay_initial, CTLFLAG_RW, &mtx_delay.initial,
+ 0, "");
+SYSCTL_INT(_debug_mtx, OID_AUTO, delay_step, CTLFLAG_RW, &mtx_delay.step,
+ 0, "");
+SYSCTL_INT(_debug_mtx, OID_AUTO, delay_min, CTLFLAG_RW, &mtx_delay.min,
+ 0, "");
+SYSCTL_INT(_debug_mtx, OID_AUTO, delay_max, CTLFLAG_RW, &mtx_delay.max,
+ 0, "");
+
+static void
+mtx_delay_sysinit(void *dummy)
+{
+
+ mtx_delay.initial = mp_ncpus * 25;
+ mtx_delay.min = mp_ncpus * 5;
+ mtx_delay.max = mp_ncpus * 25 * 10;
+}
+LOCK_DELAY_SYSINIT(mtx_delay_sysinit);
+#endif
+
/*
* System-wide mutexes
*/
@@ -408,8 +439,10 @@ __mtx_lock_sleep(volatile uintptr_t *c, uintptr_t tid, int
opts,
int contested = 0;
uint64_t waittime = 0;
#endif
+#if defined(ADAPTIVE_MUTEXES) || defined(KDTRACE_HOOKS)
+ struct lock_delay_arg lda;
+#endif
#ifdef KDTRACE_HOOKS
- u_int spin_cnt = 0;
u_int sleep_cnt = 0;
int64_t sleep_time = 0;
int64_t all_time = 0;
@@ -418,6 +451,9 @@ __mtx_lock_sleep(volatile uintptr_t *c, uintptr_t tid, int
opts,
if (SCHEDULER_STOPPED())
return;
+#if defined(ADAPTIVE_MUTEXES) || defined(KDTRACE_HOOKS)
+ lock_delay_arg_init(&lda, &mtx_delay);
+#endif
m = mtxlock2mtx(c);
if (mtx_owned(m)) {
@@ -451,7 +487,7 @@ __mtx_lock_sleep(volatile uintptr_t *c, uintptr_t tid, int
opts,
if (m->mtx_lock == MTX_UNOWNED && _mtx_obtain_lock(m, tid))
break;
#ifdef KDTRACE_HOOKS
- spin_cnt++;
+ lda.spin_cnt++;
#endif
#ifdef ADAPTIVE_MUTEXES
/*
@@ -471,12 +507,8 @@ __mtx_lock_sleep(volatile uintptr_t *c, uintptr_t tid, int
opts,
"spinning", "lockname:\"%s\"",
m->lock_object.lo_name);
while (mtx_owner(m) == owner &&
- TD_IS_RUNNING(owner)) {
- cpu_spinwait();
-#ifdef KDTRACE_HOOKS
- spin_cnt++;
-#endif
- }
+ TD_IS_RUNNING(owner))
+ lock_delay(&lda);
KTR_STATE0(KTR_SCHED, "thread",
sched_tdname((struct thread *)tid),
"running");
@@ -570,7 +602,7 @@ __mtx_lock_sleep(volatile uintptr_t *c, uintptr_t tid, int
opts,
/*
* Only record the loops spinning and not sleeping.
*/
- if (spin_cnt > sleep_cnt)
+ if (lda.spin_cnt > sleep_cnt)
LOCKSTAT_RECORD1(adaptive__spin, m, all_time - sleep_time);
#endif
}
diff --git a/sys/kern/kern_rwlock.c b/sys/kern/kern_rwlock.c
index d4cae61..363b042 100644
--- a/sys/kern/kern_rwlock.c
+++ b/sys/kern/kern_rwlock.c
@@ -44,6 +44,7 @@ __FBSDID("$FreeBSD$");
#include <sys/proc.h>
#include <sys/rwlock.h>
#include <sys/sched.h>
+#include <sys/smp.h>
#include <sys/sysctl.h>
#include <sys/systm.h>
#include <sys/turnstile.h>
@@ -65,15 +66,6 @@ PMC_SOFT_DECLARE( , , lock, failed);
*/
#define rwlock2rw(c) (__containerof(c, struct rwlock, rw_lock))
-#ifdef ADAPTIVE_RWLOCKS
-static int rowner_retries = 10;
-static int rowner_loops = 10000;
-static SYSCTL_NODE(_debug, OID_AUTO, rwlock, CTLFLAG_RD, NULL,
- "rwlock debugging");
-SYSCTL_INT(_debug_rwlock, OID_AUTO, retry, CTLFLAG_RW, &rowner_retries, 0, "");
-SYSCTL_INT(_debug_rwlock, OID_AUTO, loops, CTLFLAG_RW, &rowner_loops, 0, "");
-#endif
-
#ifdef DDB
#include <ddb/ddb.h>
@@ -100,6 +92,41 @@ struct lock_class lock_class_rw = {
#endif
};
+#ifdef ADAPTIVE_RWLOCKS
+static int rowner_retries = 10;
+static int rowner_loops = 10000;
+static SYSCTL_NODE(_debug, OID_AUTO, rwlock, CTLFLAG_RD, NULL,
+ "rwlock debugging");
+SYSCTL_INT(_debug_rwlock, OID_AUTO, retry, CTLFLAG_RW, &rowner_retries, 0, "");
+SYSCTL_INT(_debug_rwlock, OID_AUTO, loops, CTLFLAG_RW, &rowner_loops, 0, "");
+
+static struct lock_delay_config rw_delay = {
+ .initial = 1000,
+ .step = 500,
+ .min = 100,
+ .max = 5000,
+};
+
+SYSCTL_INT(_debug_rwlock, OID_AUTO, delay_initial, CTLFLAG_RW,
&rw_delay.initial,
+ 0, "");
+SYSCTL_INT(_debug_rwlock, OID_AUTO, delay_step, CTLFLAG_RW, &rw_delay.step,
+ 0, "");
+SYSCTL_INT(_debug_rwlock, OID_AUTO, delay_min, CTLFLAG_RW, &rw_delay.min,
+ 0, "");
+SYSCTL_INT(_debug_rwlock, OID_AUTO, delay_max, CTLFLAG_RW, &rw_delay.max,
+ 0, "");
+
+static void
+rw_delay_sysinit(void *dummy)
+{
+
+ rw_delay.initial = mp_ncpus * 25;
+ rw_delay.min = mp_ncpus * 5;
+ rw_delay.max = mp_ncpus * 25 * 10;
+}
+LOCK_DELAY_SYSINIT(rw_delay_sysinit);
+#endif
+
/*
* Return a pointer to the owning thread if the lock is write-locked or
* NULL if the lock is unlocked or read-locked.
@@ -355,9 +382,11 @@ __rw_rlock(volatile uintptr_t *c, const char *file, int
line)
int contested = 0;
#endif
uintptr_t v;
+#if defined(ADAPTIVE_MUTEXES) || defined(KDTRACE_HOOKS)
+ struct lock_delay_arg lda;
+#endif
#ifdef KDTRACE_HOOKS
uintptr_t state;
- u_int spin_cnt = 0;
u_int sleep_cnt = 0;
int64_t sleep_time = 0;
int64_t all_time = 0;
@@ -366,6 +395,9 @@ __rw_rlock(volatile uintptr_t *c, const char *file, int
line)
if (SCHEDULER_STOPPED())
return;
+#if defined(ADAPTIVE_MUTEXES) || defined(KDTRACE_HOOKS)
+ lock_delay_arg_init(&lda, &rw_delay);
+#endif
rw = rwlock2rw(c);
KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread),
@@ -412,7 +444,7 @@ __rw_rlock(volatile uintptr_t *c, const char *file, int
line)
continue;
}
#ifdef KDTRACE_HOOKS
- spin_cnt++;
+ lda.spin_cnt++;
#endif
#ifdef HWPMC_HOOKS
PMC_SOFT_CALL( , , lock, failed);
@@ -437,12 +469,8 @@ __rw_rlock(volatile uintptr_t *c, const char *file, int
line)
sched_tdname(curthread), "spinning",
"lockname:\"%s\"", rw->lock_object.lo_name);
while ((struct thread*)RW_OWNER(rw->rw_lock) ==
- owner && TD_IS_RUNNING(owner)) {
- cpu_spinwait();
-#ifdef KDTRACE_HOOKS
- spin_cnt++;
-#endif
- }
+ owner && TD_IS_RUNNING(owner))
+ lock_delay(&lda);
KTR_STATE0(KTR_SCHED, "thread",
sched_tdname(curthread), "running");
continue;
@@ -459,7 +487,7 @@ __rw_rlock(volatile uintptr_t *c, const char *file, int
line)
cpu_spinwait();
}
#ifdef KDTRACE_HOOKS
- spin_cnt += rowner_loops - i;
+ lda.spin_cnt += rowner_loops - i;
#endif
KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread),
"running");
@@ -552,7 +580,7 @@ __rw_rlock(volatile uintptr_t *c, const char *file, int
line)
(state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state));
/* Record only the loops spinning and not sleeping. */
- if (spin_cnt > sleep_cnt)
+ if (lda.spin_cnt > sleep_cnt)
LOCKSTAT_RECORD4(rw__spin, rw, all_time - sleep_time,
LOCKSTAT_READER, (state & RW_LOCK_READ) == 0,
(state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state));
@@ -740,9 +768,11 @@ __rw_wlock_hard(volatile uintptr_t *c, uintptr_t tid,
const char *file,
uint64_t waittime = 0;
int contested = 0;
#endif
+#if defined(ADAPTIVE_MUTEXES) || defined(KDTRACE_HOOKS)
+ struct lock_delay_arg lda;
+#endif
#ifdef KDTRACE_HOOKS
uintptr_t state;
- u_int spin_cnt = 0;
u_int sleep_cnt = 0;
int64_t sleep_time = 0;
int64_t all_time = 0;
@@ -751,6 +781,9 @@ __rw_wlock_hard(volatile uintptr_t *c, uintptr_t tid, const
char *file,
if (SCHEDULER_STOPPED())
return;
+#if defined(ADAPTIVE_MUTEXES) || defined(KDTRACE_HOOKS)
+ lock_delay_arg_init(&lda, &rw_delay);
+#endif
rw = rwlock2rw(c);
if (rw_wlocked(rw)) {
@@ -775,7 +808,7 @@ __rw_wlock_hard(volatile uintptr_t *c, uintptr_t tid, const
char *file,
if (rw->rw_lock == RW_UNLOCKED && _rw_write_lock(rw, tid))
break;
#ifdef KDTRACE_HOOKS
- spin_cnt++;
+ lda.spin_cnt++;
#endif
#ifdef HWPMC_HOOKS
PMC_SOFT_CALL( , , lock, failed);
@@ -798,12 +831,8 @@ __rw_wlock_hard(volatile uintptr_t *c, uintptr_t tid,
const char *file,
"spinning", "lockname:\"%s\"",
rw->lock_object.lo_name);
while ((struct thread*)RW_OWNER(rw->rw_lock) == owner &&
- TD_IS_RUNNING(owner)) {
- cpu_spinwait();
-#ifdef KDTRACE_HOOKS
- spin_cnt++;
-#endif
- }
+ TD_IS_RUNNING(owner))
+ lock_delay(&lda);
KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread),
"running");
continue;
@@ -828,7 +857,7 @@ __rw_wlock_hard(volatile uintptr_t *c, uintptr_t tid, const
char *file,
KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread),
"running");
#ifdef KDTRACE_HOOKS
- spin_cnt += rowner_loops - i;
+ lda.spin_cnt += rowner_loops - i;
#endif
if (i != rowner_loops)
continue;
@@ -918,7 +947,7 @@ __rw_wlock_hard(volatile uintptr_t *c, uintptr_t tid, const
char *file,
(state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state));
/* Record only the loops spinning and not sleeping. */
- if (spin_cnt > sleep_cnt)
+ if (lda.spin_cnt > sleep_cnt)
LOCKSTAT_RECORD4(rw__spin, rw, all_time - sleep_time,
LOCKSTAT_WRITER, (state & RW_LOCK_READ) == 0,
(state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state));
diff --git a/sys/kern/kern_sx.c b/sys/kern/kern_sx.c
index 78d207d..42878110 100644
--- a/sys/kern/kern_sx.c
+++ b/sys/kern/kern_sx.c
@@ -53,6 +53,7 @@ __FBSDID("$FreeBSD$");
#include <sys/sched.h>
#include <sys/sleepqueue.h>
#include <sys/sx.h>
+#include <sys/smp.h>
#include <sys/sysctl.h>
#if defined(SMP) && !defined(NO_ADAPTIVE_SX)
@@ -145,6 +146,32 @@ static u_int asx_loops = 10000;
static SYSCTL_NODE(_debug, OID_AUTO, sx, CTLFLAG_RD, NULL, "sxlock
debugging");
SYSCTL_UINT(_debug_sx, OID_AUTO, retries, CTLFLAG_RW, &asx_retries, 0, "");
SYSCTL_UINT(_debug_sx, OID_AUTO, loops, CTLFLAG_RW, &asx_loops, 0, "");
+
+static struct lock_delay_config sx_delay = {
+ .initial = 1000,
+ .step = 500,
+ .min = 100,
+ .max = 5000,
+};
+
+SYSCTL_INT(_debug_sx, OID_AUTO, delay_initial, CTLFLAG_RW, &sx_delay.initial,
+ 0, "");
+SYSCTL_INT(_debug_sx, OID_AUTO, delay_step, CTLFLAG_RW, &sx_delay.step,
+ 0, "");
+SYSCTL_INT(_debug_sx, OID_AUTO, delay_min, CTLFLAG_RW, &sx_delay.min,
+ 0, "");
+SYSCTL_INT(_debug_sx, OID_AUTO, delay_max, CTLFLAG_RW, &sx_delay.max,
+ 0, "");
+
+static void
+sx_delay_sysinit(void *dummy)
+{
+
+ sx_delay.initial = mp_ncpus * 25;
+ sx_delay.min = mp_ncpus * 5;
+ sx_delay.max = mp_ncpus * 25 * 10;
+}
+LOCK_DELAY_SYSINIT(sx_delay_sysinit);
#endif
void
@@ -513,9 +540,11 @@ _sx_xlock_hard(struct sx *sx, uintptr_t tid, int opts,
const char *file,
int contested = 0;
#endif
int error = 0;
+#if defined(ADAPTIVE_MUTEXES) || defined(KDTRACE_HOOKS)
+ struct lock_delay_arg lda;
+#endif
#ifdef KDTRACE_HOOKS
uintptr_t state;
- u_int spin_cnt = 0;
u_int sleep_cnt = 0;
int64_t sleep_time = 0;
int64_t all_time = 0;
@@ -524,6 +553,10 @@ _sx_xlock_hard(struct sx *sx, uintptr_t tid, int opts,
const char *file,
if (SCHEDULER_STOPPED())
return (0);
+#if defined(ADAPTIVE_MUTEXES) || defined(KDTRACE_HOOKS)
+ lock_delay_arg_init(&lda, &sx_delay);
+#endif
+
/* If we already hold an exclusive lock, then recurse. */
if (sx_xlocked(sx)) {
KASSERT((sx->lock_object.lo_flags & LO_RECURSABLE) != 0,
@@ -549,7 +582,7 @@ _sx_xlock_hard(struct sx *sx, uintptr_t tid, int opts,
const char *file,
atomic_cmpset_acq_ptr(&sx->sx_lock, SX_LOCK_UNLOCKED, tid))
break;
#ifdef KDTRACE_HOOKS
- spin_cnt++;
+ lda.spin_cnt++;
#endif
#ifdef HWPMC_HOOKS
PMC_SOFT_CALL( , , lock, failed);
@@ -578,12 +611,8 @@ _sx_xlock_hard(struct sx *sx, uintptr_t tid, int opts,
const char *file,
sx->lock_object.lo_name);
GIANT_SAVE();
while (SX_OWNER(sx->sx_lock) == x &&
- TD_IS_RUNNING(owner)) {
- cpu_spinwait();
-#ifdef KDTRACE_HOOKS
- spin_cnt++;
-#endif
- }
+ TD_IS_RUNNING(owner))
+ lock_delay(&lda);
KTR_STATE0(KTR_SCHED, "thread",
sched_tdname(curthread), "running");
continue;
@@ -605,7 +634,7 @@ _sx_xlock_hard(struct sx *sx, uintptr_t tid, int opts,
const char *file,
break;
cpu_spinwait();
#ifdef KDTRACE_HOOKS
- spin_cnt++;
+ lda.spin_cnt++;
#endif
}
KTR_STATE0(KTR_SCHED, "thread",
@@ -725,7 +754,7 @@ _sx_xlock_hard(struct sx *sx, uintptr_t tid, int opts,
const char *file,
LOCKSTAT_RECORD4(sx__block, sx, sleep_time,
LOCKSTAT_WRITER, (state & SX_LOCK_SHARED) == 0,
(state & SX_LOCK_SHARED) == 0 ? 0 : SX_SHARERS(state));
- if (spin_cnt > sleep_cnt)
+ if (lda.spin_cnt > sleep_cnt)
LOCKSTAT_RECORD4(sx__spin, sx, all_time - sleep_time,
LOCKSTAT_WRITER, (state & SX_LOCK_SHARED) == 0,
(state & SX_LOCK_SHARED) == 0 ? 0 : SX_SHARERS(state));
@@ -818,9 +847,11 @@ _sx_slock_hard(struct sx *sx, int opts, const char *file,
int line)
#endif
uintptr_t x;
int error = 0;
+#if defined(ADAPTIVE_MUTEXES) || defined(KDTRACE_HOOKS)
+ struct lock_delay_arg lda;
+#endif
#ifdef KDTRACE_HOOKS
uintptr_t state;
- u_int spin_cnt = 0;
u_int sleep_cnt = 0;
int64_t sleep_time = 0;
int64_t all_time = 0;
@@ -829,6 +860,9 @@ _sx_slock_hard(struct sx *sx, int opts, const char *file,
int line)
if (SCHEDULER_STOPPED())
return (0);
+#if defined(ADAPTIVE_MUTEXES) || defined(KDTRACE_HOOKS)
+ lock_delay_arg_init(&lda, &sx_delay);
+#endif
#ifdef KDTRACE_HOOKS
state = sx->sx_lock;
all_time -= lockstat_nsecs(&sx->lock_object);
@@ -840,7 +874,7 @@ _sx_slock_hard(struct sx *sx, int opts, const char *file,
int line)
*/
for (;;) {
#ifdef KDTRACE_HOOKS
- spin_cnt++;
+ lda.spin_cnt++;
#endif
x = sx->sx_lock;
@@ -888,12 +922,8 @@ _sx_slock_hard(struct sx *sx, int opts, const char *file, int line)
"lockname:\"%s\"", sx->lock_object.lo_name);
GIANT_SAVE();
while (SX_OWNER(sx->sx_lock) == x &&
- TD_IS_RUNNING(owner)) {
- cpu_spinwait();
-#ifdef KDTRACE_HOOKS
- spin_cnt++;
-#endif
- }
+ TD_IS_RUNNING(owner))
+ lock_delay(&lda);
KTR_STATE0(KTR_SCHED, "thread",
sched_tdname(curthread), "running");
continue;
@@ -989,7 +1019,7 @@ _sx_slock_hard(struct sx *sx, int opts, const char *file,
int line)
LOCKSTAT_RECORD4(sx__block, sx, sleep_time,
LOCKSTAT_READER, (state & SX_LOCK_SHARED) == 0,
(state & SX_LOCK_SHARED) == 0 ? 0 : SX_SHARERS(state));
- if (spin_cnt > sleep_cnt)
+ if (lda.spin_cnt > sleep_cnt)
LOCKSTAT_RECORD4(sx__spin, sx, all_time - sleep_time,
LOCKSTAT_READER, (state & SX_LOCK_SHARED) == 0,
(state & SX_LOCK_SHARED) == 0 ? 0 : SX_SHARERS(state));
diff --git a/sys/kern/subr_lock.c b/sys/kern/subr_lock.c
index e78d5a9..bfe189d 100644
--- a/sys/kern/subr_lock.c
+++ b/sys/kern/subr_lock.c
@@ -103,6 +103,34 @@ lock_destroy(struct lock_object *lock)
lock->lo_flags &= ~LO_INITIALIZED;
}
+void
+lock_delay(struct lock_delay_arg *la)
+{
+ u_int i, delay, backoff, min, max;
+ struct lock_delay_config *lc = la->config;
+
+ delay = la->delay;
+
+ if (delay == 0)
+ delay = lc->initial;
+ else {
+ delay += lc->step;
+ max = lc->max;
+ if (delay > max)
+ delay = max;
+ }
+
+ backoff = cpu_ticks() % delay;
+ min = lc->min;
+ if (backoff < min)
+ backoff = min;
+ for (i = 0; i < backoff; i++)
+ cpu_spinwait();
+
+ la->delay = delay;
+ la->spin_cnt += backoff;
+}
+
#ifdef DDB
DB_SHOW_COMMAND(lock, db_show_lock)
{
diff --git a/sys/sys/lock.h b/sys/sys/lock.h
index 8d7a068..bd66aad 100644
--- a/sys/sys/lock.h
+++ b/sys/sys/lock.h
@@ -201,9 +201,35 @@ extern struct lock_class lock_class_lockmgr;
extern struct lock_class *lock_classes[];
+extern int lock_delay_enabled;
+
+struct lock_delay_config {
+ u_int initial;
+ u_int step;
+ u_int min;
+ u_int max;
+};
+
+struct lock_delay_arg {
+ struct lock_delay_config *config;
+ u_int delay;
+ u_int spin_cnt;
+};
+
+static inline void
+lock_delay_arg_init(struct lock_delay_arg *la, struct lock_delay_config *lc) {
+ la->config = lc;
+ la->delay = 0;
+ la->spin_cnt = 0;
+}
+
+#define LOCK_DELAY_SYSINIT(func) \
+ SYSINIT(func##_ld, SI_SUB_LOCK, SI_ORDER_ANY, func, NULL)
+
void lock_init(struct lock_object *, struct lock_class *,
const char *, const char *, int);
void lock_destroy(struct lock_object *);
+void lock_delay(struct lock_delay_arg *);
void spinlock_enter(void);
void spinlock_exit(void);
void witness_init(struct lock_object *, const char *);
_______________________________________________
freebsd-current@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/freebsd-current
To unsubscribe, send any mail to "freebsd-current-unsubscr...@freebsd.org"