On Thu Nov 10, 2022 at 10:43 AM AEST, Jordan Niethe wrote:
> On Thu, 2022-07-28 at 16:31 +1000, Nicholas Piggin wrote:
> [resend as utf-8, not utf-7]
> > This gives trylock slightly more strength, and it also gives most
> > of the benefit of passing 'val' back through the slowpath without
> > the complexity.
> > ---
> >  arch/powerpc/include/asm/qspinlock.h | 39 +++++++++++++++++++++++++++-
> >  arch/powerpc/lib/qspinlock.c         |  9 +++++++
> >  2 files changed, 47 insertions(+), 1 deletion(-)
> > 
> > diff --git a/arch/powerpc/include/asm/qspinlock.h 
> > b/arch/powerpc/include/asm/qspinlock.h
> > index 44601b261e08..d3d2039237b2 100644
> > --- a/arch/powerpc/include/asm/qspinlock.h
> > +++ b/arch/powerpc/include/asm/qspinlock.h
> > @@ -5,6 +5,8 @@
> >  #include <linux/compiler.h>
> >  #include <asm/qspinlock_types.h>
> >  
> > +#define _Q_SPIN_TRY_LOCK_STEAL 1
>
> Would this be a config option?

I think probably not, it's more to keep the other code variant there if
we want to try tune experiment with it. We might end up cutting out a
bunch of these options if we narrow down on a good configuration.

>
> > +
> >  static __always_inline int queued_spin_is_locked(struct qspinlock *lock)
> >  {
> >     return READ_ONCE(lock->val);
> > @@ -26,11 +28,12 @@ static __always_inline u32 
> > queued_spin_get_locked_val(void)
> >     return _Q_LOCKED_VAL | (smp_processor_id() << _Q_OWNER_CPU_OFFSET);
> >  }
> >  
> > -static __always_inline int queued_spin_trylock(struct qspinlock *lock)
> > +static __always_inline int __queued_spin_trylock_nosteal(struct qspinlock 
> > *lock)
> >  {
> >     u32 new = queued_spin_get_locked_val();
> >     u32 prev;
> >  
> > +   /* Trylock succeeds only when unlocked and no queued nodes */
> >     asm volatile(
> >  "1:        lwarx   %0,0,%1,%3      # queued_spin_trylock                   
> > \n"
>
> s/queued_spin_trylock/__queued_spin_trylock_nosteal

I wanted to keep those because they (can be) inlined into the wider
kernel, so you'd rather see queued_spin_trylock than this internal name.

> >  "  cmpwi   0,%0,0                                                  \n"
> > @@ -49,6 +52,40 @@ static __always_inline int queued_spin_trylock(struct 
> > qspinlock *lock)
> >     return 0;
> >  }
> >  
> > +static __always_inline int __queued_spin_trylock_steal(struct qspinlock 
> > *lock)
> > +{
> > +   u32 new = queued_spin_get_locked_val();
> > +   u32 prev, tmp;
> > +
> > +   /* Trylock may get ahead of queued nodes if it finds unlocked */
> > +   asm volatile(
> > +"1:        lwarx   %0,0,%2,%5      # queued_spin_trylock                   
> > \n"
>
> s/queued_spin_trylock/__queued_spin_trylock_steal
>
> > +"  andc.   %1,%0,%4                                                \n"
> > +"  bne-    2f                                                      \n"
> > +"  and     %1,%0,%4                                                \n"
> > +"  or      %1,%1,%3                                                \n"
> > +"  stwcx.  %1,0,%2                                                 \n"
> > +"  bne-    1b                                                      \n"
> > +"\t"       PPC_ACQUIRE_BARRIER "                                           
> > \n"
> > +"2:                                                                        
> > \n"
>
> Just because there's a little bit more going on here...
>
> Q_TAIL_CPU_MASK = 0xFFFE0000
> ~Q_TAIL_CPU_MASK = 0x1FFFF
>
>
> 1:    lwarx   prev, 0, &lock->val, IS_ENABLED_PPC64
>       andc.   tmp, prev, _Q_TAIL_CPU_MASK     (tmp = prev & ~_Q_TAIL_CPU_MASK)
>       bne-    2f                              (exit if locked)
>       and     tmp, prev, _Q_TAIL_CPU_MASK     (tmp = prev & _Q_TAIL_CPU_MASK)
>       or      tmp, tmp, new                   (tmp |= new)                    
>                 
>       stwcx.  tmp, 0, &lock->val                                      
>               
>       bne-    1b                                                      
>       PPC_ACQUIRE_BARRIER             
> 2:
>
> ... which seems correct.

Thanks,
Nick

Reply via email to