On Thu Nov 10, 2022 at 10:40 AM AEST, Jordan Niethe wrote:
> On Thu, 2022-07-28 at 16:31 +1000, Nicholas Piggin wrote:
> [resend as utf-8, not utf-7]
> > Give the queue head the ability to stop stealers. After a number of
> > spins without sucessfully acquiring the lock, the queue head employs
> > this, which will assure it is the next owner.
> > ---
> >  arch/powerpc/include/asm/qspinlock_types.h | 10 +++-
> >  arch/powerpc/lib/qspinlock.c               | 56 +++++++++++++++++++++-
> >  2 files changed, 63 insertions(+), 3 deletions(-)
> > 
> > diff --git a/arch/powerpc/include/asm/qspinlock_types.h 
> > b/arch/powerpc/include/asm/qspinlock_types.h
> > index 210adf05b235..8b20f5e22bba 100644
> > --- a/arch/powerpc/include/asm/qspinlock_types.h
> > +++ b/arch/powerpc/include/asm/qspinlock_types.h
> > @@ -29,7 +29,8 @@ typedef struct qspinlock {
> >   * Bitfields in the lock word:
> >   *
> >   *     0: locked bit
> > - * 16-31: tail cpu (+1)
> > + *    16: must queue bit
> > + * 17-31: tail cpu (+1)
> >   */
> >  #define    _Q_SET_MASK(type)       (((1U << _Q_ ## type ## _BITS) - 1)\
> >                                   << _Q_ ## type ## _OFFSET)
> > @@ -38,7 +39,12 @@ typedef struct qspinlock {
> >  #define _Q_LOCKED_MASK             _Q_SET_MASK(LOCKED)
> >  #define _Q_LOCKED_VAL              (1U << _Q_LOCKED_OFFSET)
> >  
> > -#define _Q_TAIL_CPU_OFFSET 16
> > +#define _Q_MUST_Q_OFFSET   16
> > +#define _Q_MUST_Q_BITS             1
> > +#define _Q_MUST_Q_MASK             _Q_SET_MASK(MUST_Q)
> > +#define _Q_MUST_Q_VAL              (1U << _Q_MUST_Q_OFFSET)
> > +
> > +#define _Q_TAIL_CPU_OFFSET 17
> >  #define _Q_TAIL_CPU_BITS   (32 - _Q_TAIL_CPU_OFFSET)
> >  #define _Q_TAIL_CPU_MASK   _Q_SET_MASK(TAIL_CPU)
>
> Not a big deal but some of these values could be calculated like in the
> generic version. e.g.
>
>       #define _Q_PENDING_OFFSET       (_Q_LOCKED_OFFSET +_Q_LOCKED_BITS)

Yeah, we don't *really* have more than one locked bit though. Haven't
made up my mind about these defines yet.

> > diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c
> > index 1625cce714b2..a906cc8f15fa 100644
> > --- a/arch/powerpc/lib/qspinlock.c
> > +++ b/arch/powerpc/lib/qspinlock.c
> > @@ -22,6 +22,7 @@ struct qnodes {
> >  /* Tuning parameters */
> >  static int STEAL_SPINS __read_mostly = (1<<5);
> >  static bool MAYBE_STEALERS __read_mostly = true;
> > +static int HEAD_SPINS __read_mostly = (1<<8);
> >  
> >  static DEFINE_PER_CPU_ALIGNED(struct qnodes, qnodes);
> >  
> > @@ -30,6 +31,11 @@ static __always_inline int get_steal_spins(void)
> >     return STEAL_SPINS;
> >  }
> >  
> > +static __always_inline int get_head_spins(void)
> > +{
> > +   return HEAD_SPINS;
> > +}
> > +
> >  static inline u32 encode_tail_cpu(void)
> >  {
> >     return (smp_processor_id() + 1) << _Q_TAIL_CPU_OFFSET;
> > @@ -142,6 +148,23 @@ static __always_inline u32 publish_tail_cpu(struct 
> > qspinlock *lock, u32 tail)
> >     return prev;
> >  }
> >  
> > +static __always_inline u32 lock_set_mustq(struct qspinlock *lock)
> > +{
> > +   u32 new = _Q_MUST_Q_VAL;
> > +   u32 prev;
> > +
> > +   asm volatile(
> > +"1:        lwarx   %0,0,%1         # lock_set_mustq                        
> > \n"
>
> Is the EH bit not set because we don't hold the lock here?

Right, we're still waiting for it.

> > +"  or      %0,%0,%2                                                \n"
> > +"  stwcx.  %0,0,%1                                                 \n"
> > +"  bne-    1b                                                      \n"
> > +   : "=&r" (prev)
> > +   : "r" (&lock->val), "r" (new)
> > +   : "cr0", "memory");
>
> This is another usage close to the DEFINE_TESTOP() pattern.
>
> > +
> > +   return prev;
> > +}
> > +
> >  static struct qnode *get_tail_qnode(struct qspinlock *lock, u32 val)
> >  {
> >     int cpu = get_tail_cpu(val);
> > @@ -165,6 +188,9 @@ static inline bool try_to_steal_lock(struct qspinlock 
> > *lock)
> >     for (;;) {
> >             u32 val = READ_ONCE(lock->val);
> >  
> > +           if (val & _Q_MUST_Q_VAL)
> > +                   break;
> > +
> >             if (unlikely(!(val & _Q_LOCKED_VAL))) {
> >                     if (trylock_with_tail_cpu(lock, val))
> >                             return true;
> > @@ -246,11 +272,22 @@ static inline void queued_spin_lock_mcs_queue(struct 
> > qspinlock *lock)
> >             /* We must be the owner, just set the lock bit and acquire */
> >             lock_set_locked(lock);
> >     } else {
> > +           int iters = 0;
> > +           bool set_mustq = false;
> > +
> >  again:
> >             /* We're at the head of the waitqueue, wait for the lock. */
> > -           while ((val = READ_ONCE(lock->val)) & _Q_LOCKED_VAL)
> > +           while ((val = READ_ONCE(lock->val)) & _Q_LOCKED_VAL) {
> >                     cpu_relax();
> >  
> > +                   iters++;
>
> It seems instead of using set_mustq, (val & _Q_MUST_Q_VAL) could be checked?

I wanted to give the reader (and compiler for what that's worth) the
idea that it won't change concurrently after we set it.

Thanks,
Nick

Reply via email to