On Thu, 2022-07-28 at 16:31 +1000, Nicholas Piggin wrote: [resend as utf-8, not utf-7] > Queued waiters which are not at the head of the queue don't spin on > the lock word but their qnode lock word, waiting for the previous queued > CPU to release them. Add an option which allows these waiters to yield > to the previous CPU if its vCPU is preempted. > > Disable this option by default for now, i.e., no logical change. > --- > arch/powerpc/lib/qspinlock.c | 46 +++++++++++++++++++++++++++++++++++- > 1 file changed, 45 insertions(+), 1 deletion(-) > > diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c > index 55286ac91da5..b39f8c5b329c 100644 > --- a/arch/powerpc/lib/qspinlock.c > +++ b/arch/powerpc/lib/qspinlock.c > @@ -26,6 +26,7 @@ static bool MAYBE_STEALERS __read_mostly = true; > static int HEAD_SPINS __read_mostly = (1<<8); > > static bool pv_yield_owner __read_mostly = true; > +static bool pv_yield_prev __read_mostly = true;
Similiar suggestion, maybe pv_yield_prev_enabled would read better. Isn't this enabled by default contrary to the commit message? > > static DEFINE_PER_CPU_ALIGNED(struct qnodes, qnodes); > > @@ -224,6 +225,31 @@ static __always_inline void yield_to_locked_owner(struct > qspinlock *lock, u32 va > cpu_relax(); > } > > +static __always_inline void yield_to_prev(struct qspinlock *lock, struct > qnode *node, int prev_cpu, bool paravirt) yield_to_locked_owner() takes a raw val and works out the cpu to yield to. I think for consistency have yield_to_prev() take the raw val and work it out too. > +{ > + u32 yield_count; > + > + if (!paravirt) > + goto relax; > + > + if (!pv_yield_prev) > + goto relax; > + > + yield_count = yield_count_of(prev_cpu); > + if ((yield_count & 1) == 0) > + goto relax; /* owner vcpu is running */ > + > + smp_rmb(); /* See yield_to_locked_owner comment */ > + > + if (!node->locked) { > + yield_to_preempted(prev_cpu, yield_count); > + return; > + } > + > +relax: > + cpu_relax(); > +} > + > > static __always_inline bool try_to_steal_lock(struct qspinlock *lock, bool > paravirt) > { > @@ -291,13 +317,14 @@ static __always_inline void > queued_spin_lock_mcs_queue(struct qspinlock *lock, b > */ > if (old & _Q_TAIL_CPU_MASK) { > struct qnode *prev = get_tail_qnode(lock, old); > + int prev_cpu = get_tail_cpu(old); This could then be removed. > > /* Link @node into the waitqueue. */ > WRITE_ONCE(prev->next, node); > > /* Wait for mcs node lock to be released */ > while (!node->locked) > - cpu_relax(); > + yield_to_prev(lock, node, prev_cpu, paravirt); And would have this as: yield_to_prev(lock, node, old, paravirt); > > smp_rmb(); /* acquire barrier for the mcs lock */ > } > @@ -448,12 +475,29 @@ static int pv_yield_owner_get(void *data, u64 *val) > > DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_owner, pv_yield_owner_get, > pv_yield_owner_set, "%llu\n"); > > +static int pv_yield_prev_set(void *data, u64 val) > +{ > + pv_yield_prev = !!val; > + > + return 0; > +} > + > +static int pv_yield_prev_get(void *data, u64 *val) > +{ > + *val = pv_yield_prev; > + > + return 0; > +} > + > +DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_prev, pv_yield_prev_get, > pv_yield_prev_set, "%llu\n"); > + > static __init int spinlock_debugfs_init(void) > { > debugfs_create_file("qspl_steal_spins", 0600, arch_debugfs_dir, NULL, > &fops_steal_spins); > debugfs_create_file("qspl_head_spins", 0600, arch_debugfs_dir, NULL, > &fops_head_spins); > if (is_shared_processor()) { > debugfs_create_file("qspl_pv_yield_owner", 0600, > arch_debugfs_dir, NULL, &fops_pv_yield_owner); > + debugfs_create_file("qspl_pv_yield_prev", 0600, > arch_debugfs_dir, NULL, &fops_pv_yield_prev); > } > > return 0;