On Thu, 2022-07-28 at 16:31 +1000, Nicholas Piggin wrote:
[resend as utf-8, not utf-7]
> The first 16 bits of the lock are only modified by the owner, and other
> modifications always use atomic operations on the entire 32 bits, so
> unlocks can use plain stores on the 16 bits. This is the same kind of
> optimisation done by core qspinlock code.
> ---
>  arch/powerpc/include/asm/qspinlock.h       |  6 +-----
>  arch/powerpc/include/asm/qspinlock_types.h | 19 +++++++++++++++++--
>  2 files changed, 18 insertions(+), 7 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/qspinlock.h 
> b/arch/powerpc/include/asm/qspinlock.h
> index f06117aa60e1..79a1936fb68d 100644
> --- a/arch/powerpc/include/asm/qspinlock.h
> +++ b/arch/powerpc/include/asm/qspinlock.h
> @@ -38,11 +38,7 @@ static __always_inline void queued_spin_lock(struct 
> qspinlock *lock)
>  
>  static inline void queued_spin_unlock(struct qspinlock *lock)
>  {
> -     for (;;) {
> -             int val = atomic_read(&lock->val);
> -             if (atomic_cmpxchg_release(&lock->val, val, val & 
> ~_Q_LOCKED_VAL) == val)
> -                     return;
> -     }
> +     smp_store_release(&lock->locked, 0);

Is it also possible for lock_set_locked() to use a non-atomic acquire
operation?

>  }
>  
>  #define arch_spin_is_locked(l)               queued_spin_is_locked(l)
> diff --git a/arch/powerpc/include/asm/qspinlock_types.h 
> b/arch/powerpc/include/asm/qspinlock_types.h
> index 9630e714c70d..3425dab42576 100644
> --- a/arch/powerpc/include/asm/qspinlock_types.h
> +++ b/arch/powerpc/include/asm/qspinlock_types.h
> @@ -3,12 +3,27 @@
>  #define _ASM_POWERPC_QSPINLOCK_TYPES_H
>  
>  #include <linux/types.h>
> +#include <asm/byteorder.h>
>  
>  typedef struct qspinlock {
> -     atomic_t val;
> +     union {
> +             atomic_t val;
> +
> +#ifdef __LITTLE_ENDIAN
> +             struct {
> +                     u16     locked;
> +                     u8      reserved[2];
> +             };
> +#else
> +             struct {
> +                     u8      reserved[2];
> +                     u16     locked;
> +             };
> +#endif
> +     };
>  } arch_spinlock_t;

Just to double check we have:

#define _Q_LOCKED_OFFSET        0
#define _Q_LOCKED_BITS          1
#define _Q_LOCKED_MASK          0x00000001
#define _Q_LOCKED_VAL           1

#define _Q_TAIL_CPU_OFFSET      16
#define _Q_TAIL_CPU_BITS        16
#define _Q_TAIL_CPU_MASK        0xffff0000


so the ordering here looks correct.

>  
> -#define      __ARCH_SPIN_LOCK_UNLOCKED       { .val = ATOMIC_INIT(0) }
> +#define      __ARCH_SPIN_LOCK_UNLOCKED       { { .val = ATOMIC_INIT(0) } }
>  
>  /*
>   * Bitfields in the atomic value:

Reply via email to