On Thu, 2022-07-28 at 16:31 +1000, Nicholas Piggin wrote: [resend as utf-8, not utf-7] > Store the owner CPU number in the lock word so it may be yielded to, > as powerpc's paravirtualised simple spinlocks do. > --- > arch/powerpc/include/asm/qspinlock.h | 8 +++++++- > arch/powerpc/include/asm/qspinlock_types.h | 10 ++++++++++ > arch/powerpc/lib/qspinlock.c | 6 +++--- > 3 files changed, 20 insertions(+), 4 deletions(-) > > diff --git a/arch/powerpc/include/asm/qspinlock.h > b/arch/powerpc/include/asm/qspinlock.h > index 3ab354159e5e..44601b261e08 100644 > --- a/arch/powerpc/include/asm/qspinlock.h > +++ b/arch/powerpc/include/asm/qspinlock.h > @@ -20,9 +20,15 @@ static __always_inline int queued_spin_is_contended(struct > qspinlock *lock) > return !!(READ_ONCE(lock->val) & _Q_TAIL_CPU_MASK); > } > > +static __always_inline u32 queued_spin_get_locked_val(void)
Maybe this function should have "encode" in the name to match with encode_tail_cpu(). > +{ > + /* XXX: make this use lock value in paca like simple spinlocks? */ Is that the paca's lock_token which is 0x8000? > + return _Q_LOCKED_VAL | (smp_processor_id() << _Q_OWNER_CPU_OFFSET); > +} > + > static __always_inline int queued_spin_trylock(struct qspinlock *lock) > { > - u32 new = _Q_LOCKED_VAL; > + u32 new = queued_spin_get_locked_val(); > u32 prev; > > asm volatile( > diff --git a/arch/powerpc/include/asm/qspinlock_types.h > b/arch/powerpc/include/asm/qspinlock_types.h > index 8b20f5e22bba..35f9525381e6 100644 > --- a/arch/powerpc/include/asm/qspinlock_types.h > +++ b/arch/powerpc/include/asm/qspinlock_types.h > @@ -29,6 +29,8 @@ typedef struct qspinlock { > * Bitfields in the lock word: > * > * 0: locked bit > + * 1-14: lock holder cpu > + * 15: unused bit > * 16: must queue bit > * 17-31: tail cpu (+1) So there is one more bit to store the tail cpu vs the lock holder cpu? > */ > @@ -39,6 +41,14 @@ typedef struct qspinlock { > #define _Q_LOCKED_MASK _Q_SET_MASK(LOCKED) > #define _Q_LOCKED_VAL (1U << _Q_LOCKED_OFFSET) > > +#define _Q_OWNER_CPU_OFFSET 1 > +#define _Q_OWNER_CPU_BITS 14 > +#define _Q_OWNER_CPU_MASK _Q_SET_MASK(OWNER_CPU) > + > +#if CONFIG_NR_CPUS > (1U << _Q_OWNER_CPU_BITS) > +#error "qspinlock does not support such large CONFIG_NR_CPUS" > +#endif > + > #define _Q_MUST_Q_OFFSET 16 > #define _Q_MUST_Q_BITS 1 > #define _Q_MUST_Q_MASK _Q_SET_MASK(MUST_Q) > diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c > index a906cc8f15fa..aa26cfe21f18 100644 > --- a/arch/powerpc/lib/qspinlock.c > +++ b/arch/powerpc/lib/qspinlock.c > @@ -50,7 +50,7 @@ static inline int get_tail_cpu(u32 val) > /* Take the lock by setting the lock bit, no other CPUs will touch it. */ > static __always_inline void lock_set_locked(struct qspinlock *lock) > { > - u32 new = _Q_LOCKED_VAL; > + u32 new = queued_spin_get_locked_val(); > u32 prev; > > asm volatile( > @@ -68,7 +68,7 @@ static __always_inline void lock_set_locked(struct > qspinlock *lock) > /* Take lock, clearing tail, cmpxchg with old (which must not be locked) */ > static __always_inline int trylock_clear_tail_cpu(struct qspinlock *lock, > u32 old) > { > - u32 new = _Q_LOCKED_VAL; > + u32 new = queued_spin_get_locked_val(); > u32 prev; > > BUG_ON(old & _Q_LOCKED_VAL); > @@ -116,7 +116,7 @@ static __always_inline u32 __trylock_cmpxchg(struct > qspinlock *lock, u32 old, u3 > /* Take lock, preserving tail, cmpxchg with val (which must not be locked) */ > static __always_inline int trylock_with_tail_cpu(struct qspinlock *lock, u32 > val) > { > - u32 newval = _Q_LOCKED_VAL | (val & _Q_TAIL_CPU_MASK); > + u32 newval = queued_spin_get_locked_val() | (val & _Q_TAIL_CPU_MASK); > > if (__trylock_cmpxchg(lock, val, newval) == val) > return 1;