On Fri, Feb 07, 2014 at 01:28:37PM +0100, Peter Zijlstra wrote:
> Anyway, you can do a version with lwarx/stwcx if you're looking get rid
> of lharx.

the below seems to compile into relatively ok asm. It can be done better
if you write the entire thing by hand though.

---

typedef unsigned short ticket_t;

typedef struct {
        union {
                unsigned int pair;
                struct {
                        /* ensure @head is the MSB */
#ifdef __BIG_ENDIAN__
                        ticket_t head,tail;
#else
                        ticket_t tail,head;
#endif
                };
        };
} tickets_t;

#define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x))

#define barrier()       __asm__ __volatile__ ("" : : :"memory")
#define __lwsync()      __asm__ __volatile__ ("lwsync" : : :"memory")

#define smp_store_release(p, v)                                         \
do {                                                                    \
        __lwsync();                                                     \
        ACCESS_ONCE(*p) = (v);                                          \
} while (0)

#define smp_load_acquire(p)                                             \
({                                                                      \
        typeof(*p) ___p1 = ACCESS_ONCE(*p);                             \
        __lwsync();                                                     \
        ___p1;                                                          \
})

#define likely(x)       __builtin_expect(!!(x), 1)

#define cpu_relax()     barrier();

static inline unsigned int xadd(unsigned int *v, unsigned int i)
{
        int t, ret;
        
        __asm__ __volatile__ (
"1:     lwarx   %0, 0, %4\n"
"       mr      %1, %0\n"
"       add     %0, %3, %0\n"
"       stwcx.  %0, %0, %4\n"
"       bne-    1b\n"
        : "=&r" (t), "=&r" (ret), "+m" (*v)
        : "r" (i), "r" (v)
        : "cc");

        return ret;
}

void ticket_lock(tickets_t *lock)
{
        tickets_t t;

        /*
         * Because @head is MSB, the direct increment wrap doesn't disturb
         * @tail.
         */
        t.pair = xadd(&lock->pair, 1<<16);

        if (likely(t.head == t.tail)) {
                __lwsync(); /* acquire */
                return;
        }

        while (smp_load_acquire(&lock->tail) != t.tail)
                cpu_relax();
}

void ticket_unlock(tickets_t *lock)
{
        ticket_t tail = lock->tail + 1;

        /*
         * The store is save against the xadd for it will make the ll/sc fail
         * and try again. Aside from that PowerISA guarantees single-copy
         * atomicy for half-word writes.
         *
         * And since only the lock owner will ever write the tail, we're good.
         */
        smp_store_release(&lock->tail, tail);
}
_______________________________________________
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Reply via email to