On Thu, 2022-07-28 at 16:31 +1000, Nicholas Piggin wrote: > Provide an option that holds off queueing indefinitely while the lock > owner is preempted. This could reduce queueing latencies for very > overcommitted vcpu situations. > > This is disabled by default. > --- > arch/powerpc/lib/qspinlock.c | 91 +++++++++++++++++++++++++++++++----- > 1 file changed, 79 insertions(+), 12 deletions(-) > > diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c > index 24f68bd71e2b..5cfd69931e31 100644 > --- a/arch/powerpc/lib/qspinlock.c > +++ b/arch/powerpc/lib/qspinlock.c > @@ -35,6 +35,7 @@ static int HEAD_SPINS __read_mostly = (1<<8); > > static bool pv_yield_owner __read_mostly = true; > static bool pv_yield_allow_steal __read_mostly = false; > +static bool pv_spin_on_preempted_owner __read_mostly = false; > static bool pv_yield_prev __read_mostly = true; > static bool pv_yield_propagate_owner __read_mostly = true; > static bool pv_prod_head __read_mostly = false; > @@ -220,13 +221,15 @@ static struct qnode *get_tail_qnode(struct qspinlock > *lock, u32 val) > BUG(); > } > > -static __always_inline void __yield_to_locked_owner(struct qspinlock *lock, > u32 val, bool paravirt, bool clear_mustq) > +static __always_inline void __yield_to_locked_owner(struct qspinlock *lock, > u32 val, bool paravirt, bool clear_mustq, bool *preempted) > { > int owner; > u32 yield_count; > > BUG_ON(!(val & _Q_LOCKED_VAL)); > > + *preempted = false; > + > if (!paravirt) > goto relax; > > @@ -241,6 +244,8 @@ static __always_inline void > __yield_to_locked_owner(struct qspinlock *lock, u32 > > spin_end(); > > + *preempted = true; > + > /* > * Read the lock word after sampling the yield count. On the other side > * there may a wmb because the yield count update is done by the > @@ -265,14 +270,14 @@ static __always_inline void > __yield_to_locked_owner(struct qspinlock *lock, u32 > spin_cpu_relax(); > } > > -static __always_inline void yield_to_locked_owner(struct qspinlock *lock, > u32 val, bool paravirt) > +static __always_inline void yield_to_locked_owner(struct qspinlock *lock, > u32 val, bool paravirt, bool *preempted)
It seems like preempted parameter could be the return value of yield_to_locked_owner(). Then callers that don't use the value returned in preempted don't need to create an unnecessary variable to pass in. > { > - __yield_to_locked_owner(lock, val, paravirt, false); > + __yield_to_locked_owner(lock, val, paravirt, false, preempted); > } > > -static __always_inline void yield_head_to_locked_owner(struct qspinlock > *lock, u32 val, bool paravirt, bool clear_mustq) > +static __always_inline void yield_head_to_locked_owner(struct qspinlock > *lock, u32 val, bool paravirt, bool clear_mustq, bool *preempted) > { > - __yield_to_locked_owner(lock, val, paravirt, clear_mustq); > + __yield_to_locked_owner(lock, val, paravirt, clear_mustq, preempted); > } > > static __always_inline void propagate_yield_cpu(struct qnode *node, u32 val, > int *set_yield_cpu, bool paravirt) > @@ -364,12 +369,33 @@ static __always_inline void yield_to_prev(struct > qspinlock *lock, struct qnode * > > static __always_inline bool try_to_steal_lock(struct qspinlock *lock, bool > paravirt) > { > - int iters; > + int iters = 0; > + > + if (!STEAL_SPINS) { > + if (paravirt && pv_spin_on_preempted_owner) { > + spin_begin(); > + for (;;) { > + u32 val = READ_ONCE(lock->val); > + bool preempted; > + > + if (val & _Q_MUST_Q_VAL) > + break; > + if (!(val & _Q_LOCKED_VAL)) > + break; > + if (!vcpu_is_preempted(get_owner_cpu(val))) > + break; > + yield_to_locked_owner(lock, val, paravirt, > &preempted); > + } > + spin_end(); > + } > + return false; > + } > > /* Attempt to steal the lock */ > spin_begin(); > for (;;) { > u32 val = READ_ONCE(lock->val); > + bool preempted; > > if (val & _Q_MUST_Q_VAL) > break; > @@ -382,9 +408,22 @@ static __always_inline bool try_to_steal_lock(struct > qspinlock *lock, bool parav > continue; > } > > - yield_to_locked_owner(lock, val, paravirt); > - > - iters++; > + yield_to_locked_owner(lock, val, paravirt, &preempted); > + > + if (paravirt && preempted) { > + if (!pv_spin_on_preempted_owner) > + iters++; > + /* > + * pv_spin_on_preempted_owner don't increase iters > + * while the owner is preempted -- we won't interfere > + * with it by definition. This could introduce some > + * latency issue if we continually observe preempted > + * owners, but hopefully that's a rare corner case of > + * a badly oversubscribed system. > + */ > + } else { > + iters++; > + } > > if (iters >= get_steal_spins(paravirt, false)) > break; > @@ -463,8 +502,10 @@ static __always_inline void > queued_spin_lock_mcs_queue(struct qspinlock *lock, b > /* We're at the head of the waitqueue, wait for the lock. */ > spin_begin(); > while ((val = READ_ONCE(lock->val)) & _Q_LOCKED_VAL) { > + bool preempted; > + > propagate_yield_cpu(node, val, &set_yield_cpu, > paravirt); > - yield_head_to_locked_owner(lock, val, paravirt, false); > + yield_head_to_locked_owner(lock, val, paravirt, false, > &preempted); > } > spin_end(); > > @@ -486,11 +527,20 @@ static __always_inline void > queued_spin_lock_mcs_queue(struct qspinlock *lock, b > /* We're at the head of the waitqueue, wait for the lock. */ > spin_begin(); > while ((val = READ_ONCE(lock->val)) & _Q_LOCKED_VAL) { > + bool preempted; > + > propagate_yield_cpu(node, val, &set_yield_cpu, > paravirt); > yield_head_to_locked_owner(lock, val, paravirt, > - pv_yield_allow_steal && set_mustq); > + pv_yield_allow_steal && set_mustq, > + &preempted); > + > + if (paravirt && preempted) { > + if (!pv_spin_on_preempted_owner) > + iters++; > + } else { > + iters++; > + } > > - iters++; > if (!set_mustq && iters >= get_head_spins(paravirt)) { > set_mustq = true; > lock_set_mustq(lock); > @@ -663,6 +713,22 @@ static int pv_yield_allow_steal_get(void *data, u64 *val) > > DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_allow_steal, pv_yield_allow_steal_get, > pv_yield_allow_steal_set, "%llu\n"); > > +static int pv_spin_on_preempted_owner_set(void *data, u64 val) > +{ > + pv_spin_on_preempted_owner = !!val; > + > + return 0; > +} > + > +static int pv_spin_on_preempted_owner_get(void *data, u64 *val) > +{ > + *val = pv_spin_on_preempted_owner; > + > + return 0; > +} > + > +DEFINE_SIMPLE_ATTRIBUTE(fops_pv_spin_on_preempted_owner, > pv_spin_on_preempted_owner_get, pv_spin_on_preempted_owner_set, "%llu\n"); > + > static int pv_yield_prev_set(void *data, u64 val) > { > pv_yield_prev = !!val; > @@ -719,6 +785,7 @@ static __init int spinlock_debugfs_init(void) > if (is_shared_processor()) { > debugfs_create_file("qspl_pv_yield_owner", 0600, > arch_debugfs_dir, NULL, &fops_pv_yield_owner); > debugfs_create_file("qspl_pv_yield_allow_steal", 0600, > arch_debugfs_dir, NULL, &fops_pv_yield_allow_steal); > + debugfs_create_file("qspl_pv_spin_on_preempted_owner", 0600, > arch_debugfs_dir, NULL, &fops_pv_spin_on_preempted_owner); > debugfs_create_file("qspl_pv_yield_prev", 0600, > arch_debugfs_dir, NULL, &fops_pv_yield_prev); > debugfs_create_file("qspl_pv_yield_propagate_owner", 0600, > arch_debugfs_dir, NULL, &fops_pv_yield_propagate_owner); > debugfs_create_file("qspl_pv_prod_head", 0600, > arch_debugfs_dir, NULL, &fops_pv_prod_head);