This gives trylock slightly more strength, and it also gives most of the benefit of passing 'val' back through the slowpath without the complexity.
Signed-off-by: Nicholas Piggin <npig...@gmail.com> --- arch/powerpc/include/asm/qspinlock.h | 44 +++++++++++++++++++++++++++- arch/powerpc/lib/qspinlock.c | 9 ++++++ 2 files changed, 52 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/qspinlock.h b/arch/powerpc/include/asm/qspinlock.h index 3eff2d875bb6..56638175e49b 100644 --- a/arch/powerpc/include/asm/qspinlock.h +++ b/arch/powerpc/include/asm/qspinlock.h @@ -5,6 +5,15 @@ #include <linux/compiler.h> #include <asm/qspinlock_types.h> +/* + * The trylock itself may steal. This makes trylocks slightly stronger, and + * might make spin locks slightly more efficient when stealing. + * + * This is compile-time, so if true then there may always be stealers, so the + * nosteal paths become unused. + */ +#define _Q_SPIN_TRY_LOCK_STEAL 1 + static __always_inline int queued_spin_is_locked(struct qspinlock *lock) { return READ_ONCE(lock->val); @@ -26,11 +35,12 @@ static __always_inline u32 queued_spin_encode_locked_val(void) return _Q_LOCKED_VAL | (smp_processor_id() << _Q_OWNER_CPU_OFFSET); } -static __always_inline int queued_spin_trylock(struct qspinlock *lock) +static __always_inline int __queued_spin_trylock_nosteal(struct qspinlock *lock) { u32 new = queued_spin_encode_locked_val(); u32 prev; + /* Trylock succeeds only when unlocked and no queued nodes */ asm volatile( "1: lwarx %0,0,%1,%3 # queued_spin_trylock \n" " cmpwi 0,%0,0 \n" @@ -47,6 +57,38 @@ static __always_inline int queued_spin_trylock(struct qspinlock *lock) return likely(prev == 0); } +static __always_inline int __queued_spin_trylock_steal(struct qspinlock *lock) +{ + u32 new = queued_spin_encode_locked_val(); + u32 prev, tmp; + + /* Trylock may get ahead of queued nodes if it finds unlocked */ + asm volatile( +"1: lwarx %0,0,%2,%5 # queued_spin_trylock \n" +" andc. %1,%0,%4 \n" +" bne- 2f \n" +" and %1,%0,%4 \n" +" or %1,%1,%3 \n" +" stwcx. %1,0,%2 \n" +" bne- 1b \n" +"\t" PPC_ACQUIRE_BARRIER " \n" +"2: \n" + : "=&r" (prev), "=&r" (tmp) + : "r" (&lock->val), "r" (new), "r" (_Q_TAIL_CPU_MASK), + "i" (IS_ENABLED(CONFIG_PPC64)) + : "cr0", "memory"); + + return likely(!(prev & ~_Q_TAIL_CPU_MASK)); +} + +static __always_inline int queued_spin_trylock(struct qspinlock *lock) +{ + if (!_Q_SPIN_TRY_LOCK_STEAL) + return __queued_spin_trylock_nosteal(lock); + else + return __queued_spin_trylock_steal(lock); +} + void queued_spin_lock_slowpath(struct qspinlock *lock); static __always_inline void queued_spin_lock(struct qspinlock *lock) diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c index 51123240da8e..830a90a66f5f 100644 --- a/arch/powerpc/lib/qspinlock.c +++ b/arch/powerpc/lib/qspinlock.c @@ -24,7 +24,11 @@ struct qnodes { /* Tuning parameters */ static int steal_spins __read_mostly = (1<<5); +#if _Q_SPIN_TRY_LOCK_STEAL == 1 +static const bool maybe_stealers = true; +#else static bool maybe_stealers __read_mostly = true; +#endif static int head_spins __read_mostly = (1<<8); static bool pv_yield_owner __read_mostly = true; @@ -527,6 +531,10 @@ void pv_spinlocks_init(void) #include <linux/debugfs.h> static int steal_spins_set(void *data, u64 val) { +#if _Q_SPIN_TRY_LOCK_STEAL == 1 + /* MAYBE_STEAL remains true */ + steal_spins = val; +#else static DEFINE_MUTEX(lock); /* @@ -551,6 +559,7 @@ static int steal_spins_set(void *data, u64 val) steal_spins = val; } mutex_unlock(&lock); +#endif return 0; } -- 2.37.2