In pv_wait_head_or_lock, if there is a spurious_wakeup, and it fails to get the lock as there is lock stealing, then after a short spin, we need hash the lock again and enter pv_wait to yield.
Currently after a spurious_wakeup, as l->locked is not _Q_SLOW_VAL, pv_wait might do nothing and return directly, that is not paravirt-friendly because pv_wait_head_or_lock will just spin on the lock then. Signed-off-by: Pan Xinhui <xinhui....@linux.vnet.ibm.com> --- kernel/locking/qspinlock_paravirt.h | 39 +++++++++++++++++++++++++++++-------- 1 file changed, 31 insertions(+), 8 deletions(-) diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h index 2bbffe4..3482ce9 100644 --- a/kernel/locking/qspinlock_paravirt.h +++ b/kernel/locking/qspinlock_paravirt.h @@ -429,14 +429,15 @@ static void pv_kick_node(struct qspinlock *lock, struct mcs_spinlock *node) return; /* - * Put the lock into the hash table and set the _Q_SLOW_VAL. - * - * As this is the same vCPU that will check the _Q_SLOW_VAL value and - * the hash table later on at unlock time, no atomic instruction is - * needed. + * Put the lock into the hash table and set the _Q_SLOW_VAL later */ - WRITE_ONCE(l->locked, _Q_SLOW_VAL); (void)pv_hash(lock, pn); + + /* + * Match the smp_load_acquire in pv_wait_head_or_lock() + * We mush set the _Q_SLOW_VAL after hash. + */ + smp_store_release(&l->locked, _Q_SLOW_VAL); } /* @@ -454,6 +455,7 @@ pv_wait_head_or_lock(struct qspinlock *lock, struct mcs_spinlock *node) struct qspinlock **lp = NULL; int waitcnt = 0; int loop; + u8 lock_val; /* * If pv_kick_node() already advanced our state, we don't need to @@ -487,7 +489,7 @@ pv_wait_head_or_lock(struct qspinlock *lock, struct mcs_spinlock *node) clear_pending(lock); - if (!lp) { /* ONCE */ + if (!lp) { lp = pv_hash(lock, pn); /* @@ -517,6 +519,13 @@ pv_wait_head_or_lock(struct qspinlock *lock, struct mcs_spinlock *node) qstat_inc(qstat_pv_wait_again, waitcnt); /* + * make sure pv_kick_node has hashed the lock, so after pv_wait + * if ->locked is not _Q_SLOW_VAL, we can hash the lock again. + */ + if (lp == (struct qspinlock **)1 + && smp_load_acquire(&l->locked) == _Q_SLOW_VAL) + lp = (struct qspinlock **)2; + /* * Pass in the previous node vCPU nmber which is likely to be * the lock holder vCPU. This additional information may help * the hypervisor to give more resource to that vCPU so that @@ -525,13 +534,27 @@ pv_wait_head_or_lock(struct qspinlock *lock, struct mcs_spinlock *node) */ pv_wait(&l->locked, _Q_SLOW_VAL, pn->prev_cpu); + lock_val = READ_ONCE(l->locked); + + /* if ->locked is zero, then lock owner has unhashed the lock + * if ->locked is _Q_LOCKED_VAL, + * 1) pv_kick_node didnot hash the lock, and lp != 0x1 + * 2) lock stealing, lock owner has unhashed the lock too + * 3) race with pv_kick_node, if lp == 2, we know it has hashed + * the lock and the lock is unhashed in unlock() + * if ->lock is _Q_SLOW_VAL, spurious_wakeup? + */ + if (lock_val != _Q_SLOW_VAL) { + if (lock_val == 0 || lp != (struct qspinlock **)1) + lp = 0; + } /* * The unlocker should have freed the lock before kicking the * CPU. So if the lock is still not free, it is a spurious * wakeup or another vCPU has stolen the lock. The current * vCPU should spin again. */ - qstat_inc(qstat_pv_spurious_wakeup, READ_ONCE(l->locked)); + qstat_inc(qstat_pv_spurious_wakeup, lock_val); } /* -- 1.9.1