Provide an option that holds off queueing indefinitely while the lock owner is preempted. This could reduce queueing latencies for very overcommitted vcpu situations.
This is disabled by default. --- arch/powerpc/lib/qspinlock.c | 91 +++++++++++++++++++++++++++++++----- 1 file changed, 79 insertions(+), 12 deletions(-) diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c index 24f68bd71e2b..5cfd69931e31 100644 --- a/arch/powerpc/lib/qspinlock.c +++ b/arch/powerpc/lib/qspinlock.c @@ -35,6 +35,7 @@ static int HEAD_SPINS __read_mostly = (1<<8); static bool pv_yield_owner __read_mostly = true; static bool pv_yield_allow_steal __read_mostly = false; +static bool pv_spin_on_preempted_owner __read_mostly = false; static bool pv_yield_prev __read_mostly = true; static bool pv_yield_propagate_owner __read_mostly = true; static bool pv_prod_head __read_mostly = false; @@ -220,13 +221,15 @@ static struct qnode *get_tail_qnode(struct qspinlock *lock, u32 val) BUG(); } -static __always_inline void __yield_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt, bool clear_mustq) +static __always_inline void __yield_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt, bool clear_mustq, bool *preempted) { int owner; u32 yield_count; BUG_ON(!(val & _Q_LOCKED_VAL)); + *preempted = false; + if (!paravirt) goto relax; @@ -241,6 +244,8 @@ static __always_inline void __yield_to_locked_owner(struct qspinlock *lock, u32 spin_end(); + *preempted = true; + /* * Read the lock word after sampling the yield count. On the other side * there may a wmb because the yield count update is done by the @@ -265,14 +270,14 @@ static __always_inline void __yield_to_locked_owner(struct qspinlock *lock, u32 spin_cpu_relax(); } -static __always_inline void yield_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt) +static __always_inline void yield_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt, bool *preempted) { - __yield_to_locked_owner(lock, val, paravirt, false); + __yield_to_locked_owner(lock, val, paravirt, false, preempted); } -static __always_inline void yield_head_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt, bool clear_mustq) +static __always_inline void yield_head_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt, bool clear_mustq, bool *preempted) { - __yield_to_locked_owner(lock, val, paravirt, clear_mustq); + __yield_to_locked_owner(lock, val, paravirt, clear_mustq, preempted); } static __always_inline void propagate_yield_cpu(struct qnode *node, u32 val, int *set_yield_cpu, bool paravirt) @@ -364,12 +369,33 @@ static __always_inline void yield_to_prev(struct qspinlock *lock, struct qnode * static __always_inline bool try_to_steal_lock(struct qspinlock *lock, bool paravirt) { - int iters; + int iters = 0; + + if (!STEAL_SPINS) { + if (paravirt && pv_spin_on_preempted_owner) { + spin_begin(); + for (;;) { + u32 val = READ_ONCE(lock->val); + bool preempted; + + if (val & _Q_MUST_Q_VAL) + break; + if (!(val & _Q_LOCKED_VAL)) + break; + if (!vcpu_is_preempted(get_owner_cpu(val))) + break; + yield_to_locked_owner(lock, val, paravirt, &preempted); + } + spin_end(); + } + return false; + } /* Attempt to steal the lock */ spin_begin(); for (;;) { u32 val = READ_ONCE(lock->val); + bool preempted; if (val & _Q_MUST_Q_VAL) break; @@ -382,9 +408,22 @@ static __always_inline bool try_to_steal_lock(struct qspinlock *lock, bool parav continue; } - yield_to_locked_owner(lock, val, paravirt); - - iters++; + yield_to_locked_owner(lock, val, paravirt, &preempted); + + if (paravirt && preempted) { + if (!pv_spin_on_preempted_owner) + iters++; + /* + * pv_spin_on_preempted_owner don't increase iters + * while the owner is preempted -- we won't interfere + * with it by definition. This could introduce some + * latency issue if we continually observe preempted + * owners, but hopefully that's a rare corner case of + * a badly oversubscribed system. + */ + } else { + iters++; + } if (iters >= get_steal_spins(paravirt, false)) break; @@ -463,8 +502,10 @@ static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, b /* We're at the head of the waitqueue, wait for the lock. */ spin_begin(); while ((val = READ_ONCE(lock->val)) & _Q_LOCKED_VAL) { + bool preempted; + propagate_yield_cpu(node, val, &set_yield_cpu, paravirt); - yield_head_to_locked_owner(lock, val, paravirt, false); + yield_head_to_locked_owner(lock, val, paravirt, false, &preempted); } spin_end(); @@ -486,11 +527,20 @@ static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, b /* We're at the head of the waitqueue, wait for the lock. */ spin_begin(); while ((val = READ_ONCE(lock->val)) & _Q_LOCKED_VAL) { + bool preempted; + propagate_yield_cpu(node, val, &set_yield_cpu, paravirt); yield_head_to_locked_owner(lock, val, paravirt, - pv_yield_allow_steal && set_mustq); + pv_yield_allow_steal && set_mustq, + &preempted); + + if (paravirt && preempted) { + if (!pv_spin_on_preempted_owner) + iters++; + } else { + iters++; + } - iters++; if (!set_mustq && iters >= get_head_spins(paravirt)) { set_mustq = true; lock_set_mustq(lock); @@ -663,6 +713,22 @@ static int pv_yield_allow_steal_get(void *data, u64 *val) DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_allow_steal, pv_yield_allow_steal_get, pv_yield_allow_steal_set, "%llu\n"); +static int pv_spin_on_preempted_owner_set(void *data, u64 val) +{ + pv_spin_on_preempted_owner = !!val; + + return 0; +} + +static int pv_spin_on_preempted_owner_get(void *data, u64 *val) +{ + *val = pv_spin_on_preempted_owner; + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_pv_spin_on_preempted_owner, pv_spin_on_preempted_owner_get, pv_spin_on_preempted_owner_set, "%llu\n"); + static int pv_yield_prev_set(void *data, u64 val) { pv_yield_prev = !!val; @@ -719,6 +785,7 @@ static __init int spinlock_debugfs_init(void) if (is_shared_processor()) { debugfs_create_file("qspl_pv_yield_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_owner); debugfs_create_file("qspl_pv_yield_allow_steal", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_allow_steal); + debugfs_create_file("qspl_pv_spin_on_preempted_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_spin_on_preempted_owner); debugfs_create_file("qspl_pv_yield_prev", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_prev); debugfs_create_file("qspl_pv_yield_propagate_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_propagate_owner); debugfs_create_file("qspl_pv_prod_head", 0600, arch_debugfs_dir, NULL, &fops_pv_prod_head); -- 2.35.1