> > > > > *sl);  static
> > > > > inline void  rte_spinlock_lock(rte_spinlock_t *sl)  {
> > > > > -     while (__sync_lock_test_and_set(&sl->locked, 1))
> > > > > -             while(sl->locked)
> > > > > +     int exp = 0;
> > > > > +
> > > > > +     while (!__atomic_compare_exchange_n(&sl->locked, &exp,
> > > > > 1, 0,
> > > > > +                             __ATOMIC_ACQUIRE,
> > > > > __ATOMIC_RELAXED))
> > > > {
> > > >
> > > > How about remove explict exp = 0 and change to
> > > > __atomic_test_and_set(flag, __ATOMIC_ACQUIRE);
> > >
> > > Yes, __atomic_test_and_set means simpler code and better, but
> > > __atomic_test_and_set takes the first argument as a pointer to type
> > > bool or
> > > char, in our case, sl->locked is of type uint32.
> > > We can force it to uint8, or just pass in the 32bit pointer, only
> > > one byte/bit is
> > > really used in this case, is that ok?
> > >
> > > "It should be only used for operands of type bool or char. For
> > > other types only
> > > part of the value may be set."
> > > https://gcc.gnu.org/onlinedocs/gcc-6.1.0/gcc/_005f_005fatomic-
> > > Builtins.html
> > >
> > > From performance perspective, in our testing, the performance was
> > > very close,
> > > compared to __atomic.
> > If performance is close, I suggest we go with the existing patch.
> > Changing sl->locked to bool/char would be an ABI change and will
> > affect x86 TM based implementation as well.
> > Jerin, what do you think?
> 
> Looks good to me.
> 
I tested __atomic_test_and_test based patch(I did not push this, it is in 
internal review), it caused 10 times performance degradation on ThunderX2.
In the internal review, Ola Liljedahl's comment well explained the degradation:
"Unlike the old code, the new code will write the lock location (and thus get 
exclusive access to the whole cache line) repeatedly while it is busy waiting. 
This is bad for the lock owner if it is accessing other data located in the 
same cache line as the lock (which is often the case)."
The real difference is __atomic_test_and_test keeps writing the lock 
location(whether it is locked or not) and monopolizing the cache line, it is 
bad to the lock owner and other contenders. 

Reply via email to