On Fri, Feb 07, 2014 at 01:28:37PM +0100, Peter Zijlstra wrote: > Anyway, you can do a version with lwarx/stwcx if you're looking get rid > of lharx.
the below seems to compile into relatively ok asm. It can be done better if you write the entire thing by hand though. --- typedef unsigned short ticket_t; typedef struct { union { unsigned int pair; struct { /* ensure @head is the MSB */ #ifdef __BIG_ENDIAN__ ticket_t head,tail; #else ticket_t tail,head; #endif }; }; } tickets_t; #define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x)) #define barrier() __asm__ __volatile__ ("" : : :"memory") #define __lwsync() __asm__ __volatile__ ("lwsync" : : :"memory") #define smp_store_release(p, v) \ do { \ __lwsync(); \ ACCESS_ONCE(*p) = (v); \ } while (0) #define smp_load_acquire(p) \ ({ \ typeof(*p) ___p1 = ACCESS_ONCE(*p); \ __lwsync(); \ ___p1; \ }) #define likely(x) __builtin_expect(!!(x), 1) #define cpu_relax() barrier(); static inline unsigned int xadd(unsigned int *v, unsigned int i) { int t, ret; __asm__ __volatile__ ( "1: lwarx %0, 0, %4\n" " mr %1, %0\n" " add %0, %3, %0\n" " stwcx. %0, %0, %4\n" " bne- 1b\n" : "=&r" (t), "=&r" (ret), "+m" (*v) : "r" (i), "r" (v) : "cc"); return ret; } void ticket_lock(tickets_t *lock) { tickets_t t; /* * Because @head is MSB, the direct increment wrap doesn't disturb * @tail. */ t.pair = xadd(&lock->pair, 1<<16); if (likely(t.head == t.tail)) { __lwsync(); /* acquire */ return; } while (smp_load_acquire(&lock->tail) != t.tail) cpu_relax(); } void ticket_unlock(tickets_t *lock) { ticket_t tail = lock->tail + 1; /* * The store is save against the xadd for it will make the ll/sc fail * and try again. Aside from that PowerISA guarantees single-copy * atomicy for half-word writes. * * And since only the lock owner will ever write the tail, we're good. */ smp_store_release(&lock->tail, tail); } _______________________________________________ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev