On Thu, 2022-07-28 at 16:31 +1000, Nicholas Piggin wrote: [resend as utf-8, not utf-7] > The first 16 bits of the lock are only modified by the owner, and other > modifications always use atomic operations on the entire 32 bits, so > unlocks can use plain stores on the 16 bits. This is the same kind of > optimisation done by core qspinlock code. > --- > arch/powerpc/include/asm/qspinlock.h | 6 +----- > arch/powerpc/include/asm/qspinlock_types.h | 19 +++++++++++++++++-- > 2 files changed, 18 insertions(+), 7 deletions(-) > > diff --git a/arch/powerpc/include/asm/qspinlock.h > b/arch/powerpc/include/asm/qspinlock.h > index f06117aa60e1..79a1936fb68d 100644 > --- a/arch/powerpc/include/asm/qspinlock.h > +++ b/arch/powerpc/include/asm/qspinlock.h > @@ -38,11 +38,7 @@ static __always_inline void queued_spin_lock(struct > qspinlock *lock) > > static inline void queued_spin_unlock(struct qspinlock *lock) > { > - for (;;) { > - int val = atomic_read(&lock->val); > - if (atomic_cmpxchg_release(&lock->val, val, val & > ~_Q_LOCKED_VAL) == val) > - return; > - } > + smp_store_release(&lock->locked, 0);
Is it also possible for lock_set_locked() to use a non-atomic acquire operation? > } > > #define arch_spin_is_locked(l) queued_spin_is_locked(l) > diff --git a/arch/powerpc/include/asm/qspinlock_types.h > b/arch/powerpc/include/asm/qspinlock_types.h > index 9630e714c70d..3425dab42576 100644 > --- a/arch/powerpc/include/asm/qspinlock_types.h > +++ b/arch/powerpc/include/asm/qspinlock_types.h > @@ -3,12 +3,27 @@ > #define _ASM_POWERPC_QSPINLOCK_TYPES_H > > #include <linux/types.h> > +#include <asm/byteorder.h> > > typedef struct qspinlock { > - atomic_t val; > + union { > + atomic_t val; > + > +#ifdef __LITTLE_ENDIAN > + struct { > + u16 locked; > + u8 reserved[2]; > + }; > +#else > + struct { > + u8 reserved[2]; > + u16 locked; > + }; > +#endif > + }; > } arch_spinlock_t; Just to double check we have: #define _Q_LOCKED_OFFSET 0 #define _Q_LOCKED_BITS 1 #define _Q_LOCKED_MASK 0x00000001 #define _Q_LOCKED_VAL 1 #define _Q_TAIL_CPU_OFFSET 16 #define _Q_TAIL_CPU_BITS 16 #define _Q_TAIL_CPU_MASK 0xffff0000 so the ordering here looks correct. > > -#define __ARCH_SPIN_LOCK_UNLOCKED { .val = ATOMIC_INIT(0) } > +#define __ARCH_SPIN_LOCK_UNLOCKED { { .val = ATOMIC_INIT(0) } } > > /* > * Bitfields in the atomic value: