Hi, On 2014-06-26 23:01:10 +0200, Andres Freund wrote: > I think we should rework things so that we fall back to > pg_write_barrier(), (*((volatile slock_t *) (lock)) = 0) instead of what > we have right now. > That'd require to make barrier.h independent from s_lock.h, but I think > that'd be a pretty clear improvement. One open question is what happens > with the SpinlockRelease() when barriers are implemented using spinlocks > and we need a barrier for the SpinlockRelease().
Too tired to think about this any further today. Here's my current state of this: * barrier.h's spinlock implementation moved to s_lock.c, loosing the s_lock.h include. That requires a S_UNLOCK definition which doesn't again use a barrier. I've coined it S_UNLOCKED_UNLOCK * s_lock.h now includes barrier.h to implement the generic S_UNLOCK safely. * gcc x86-64 had a superflous "cc" clobber. Likely copied from the 32bit version which does additional operations. * PPC was missing a compiler barrier * alpha was missing a "cc" clobber. * mips was missing a compiler barrier and a "cc" clobber * I have no idea how to fix pa-risc's S_UNLOCK for !gcc. The referenced spinlock paper calls a external function to avoid reordering. Greetings, Andres Freund -- Andres Freund http://www.2ndQuadrant.com/ PostgreSQL Development, 24x7 Support, Training & Services
diff --git a/src/backend/storage/lmgr/s_lock.c b/src/backend/storage/lmgr/s_lock.c index efe1b43..5052718 100644 --- a/src/backend/storage/lmgr/s_lock.c +++ b/src/backend/storage/lmgr/s_lock.c @@ -187,6 +187,23 @@ update_spins_per_delay(int shared_spins_per_delay) return (shared_spins_per_delay * 15 + spins_per_delay) / 16; } +/* + * Memory barrier implementation based on lock/unlock to a spinlock. Check + * barrier.h for details. + */ +#ifdef PG_SIMULATE_MEMORY_BARRIER +void +pg_memory_barrier_impl(void) +{ + S_LOCK(&dummy_spinlock); +#ifdef S_UNLOCKED_UNLOCK + S_UNLOCKED_UNLOCK(&dummy_spinlock); +#else + S_UNLOCK(&dummy_spinlock); +#endif +} +#endif /* PG_SIMULATE_MEMORY_BARRIER */ + /* * Various TAS implementations that cannot live in s_lock.h as no inline diff --git a/src/backend/storage/lmgr/spin.c b/src/backend/storage/lmgr/spin.c index 9b71744..03a4fe8 100644 --- a/src/backend/storage/lmgr/spin.c +++ b/src/backend/storage/lmgr/spin.c @@ -25,6 +25,7 @@ #include "access/xlog.h" #include "miscadmin.h" #include "replication/walsender.h" +#include "storage/barrier.h" #include "storage/lwlock.h" #include "storage/pg_sema.h" #include "storage/spin.h" diff --git a/src/include/storage/barrier.h b/src/include/storage/barrier.h index bc61de0..e5692ac 100644 --- a/src/include/storage/barrier.h +++ b/src/include/storage/barrier.h @@ -13,10 +13,6 @@ #ifndef BARRIER_H #define BARRIER_H -#include "storage/s_lock.h" - -extern slock_t dummy_spinlock; - /* * A compiler barrier need not (and preferably should not) emit any actual * machine code, but must act as an optimization fence: the compiler must not @@ -155,10 +151,14 @@ extern slock_t dummy_spinlock; * spinlock acquire-and-release would be equivalent to a full memory barrier. * For example, I'm not sure that Itanium's acq and rel add up to a full * fence. But all of our actual implementations seem OK in this regard. + * + * The actual implementation is in s_lock.c so we can include barrier.h from + * s_lock.h. Yes, this will make things even slower, but who cares. */ #if !defined(pg_memory_barrier) -#define pg_memory_barrier() \ - do { S_LOCK(&dummy_spinlock); S_UNLOCK(&dummy_spinlock); } while (0) +#define PG_SIMULATE_MEMORY_BARRIER +extern void pg_memory_barrier_impl(void); +#define pg_memory_barrier() pg_memory_barrier_impl() #endif /* diff --git a/src/include/storage/s_lock.h b/src/include/storage/s_lock.h index ba4dfe1..2732054 100644 --- a/src/include/storage/s_lock.h +++ b/src/include/storage/s_lock.h @@ -19,7 +19,8 @@ * Should return number of "delays"; see s_lock.c * * void S_UNLOCK(slock_t *lock) - * Unlock a previously acquired lock. + * Unlock a previously acquired lock. Needs to imply a compiler and + * write memory barrier. * * bool S_LOCK_FREE(slock_t *lock) * Tests if the lock is free. Returns TRUE if free, FALSE if locked. @@ -39,6 +40,7 @@ * Atomic test-and-set instruction. Attempt to acquire the lock, * but do *not* wait. Returns 0 if successful, nonzero if unable * to acquire the lock. + * Needs to imply a compiler and read memory barrier. * * int TAS_SPIN(slock_t *lock) * Like TAS(), but this version is used when waiting for a lock @@ -94,6 +96,8 @@ #ifndef S_LOCK_H #define S_LOCK_H +#include "storage/barrier.h" + #ifdef HAVE_SPINLOCKS /* skip spinlocks if requested */ #if defined(__GNUC__) || defined(__INTEL_COMPILER) @@ -224,7 +228,7 @@ tas(volatile slock_t *lock) " xchgb %0,%1 \n" : "+q"(_res), "+m"(*lock) : -: "memory", "cc"); +: "memory"); return (int) _res; } @@ -478,14 +482,14 @@ tas(volatile slock_t *lock) #define S_UNLOCK(lock) \ do \ { \ - __asm__ __volatile__ (" lwsync \n"); \ + __asm__ __volatile__ (" lwsync \n":::"memory"); \ *((volatile slock_t *) (lock)) = 0; \ } while (0) #else #define S_UNLOCK(lock) \ do \ { \ - __asm__ __volatile__ (" sync \n"); \ + __asm__ __volatile__ (" sync \n":::"memory"); \ *((volatile slock_t *) (lock)) = 0; \ } while (0) #endif /* USE_PPC_LWSYNC */ @@ -542,7 +546,7 @@ tas(volatile slock_t *lock) "1: \n" : "=&r"(_res), "+m"(*lock) : "r"(lock) -: "memory"); +: "memory", "cc"); return _res; } @@ -580,14 +584,14 @@ tas(volatile slock_t *lock) "3: \n" : "=&r"(_res), "+m"(*lock) : -: "memory", "0"); +: "memory", "0", "cc"); return (int) _res; } #define S_UNLOCK(lock) \ do \ {\ - __asm__ __volatile__ (" mb \n"); \ + __asm__ __volatile__ (" mb \n":::"memory"); \ *((volatile slock_t *) (lock)) = 0; \ } while (0) @@ -624,7 +628,7 @@ tas(volatile slock_t *lock) " .set pop " : "=&r" (_res), "=&r" (_tmp), "+R" (*_l) : -: "memory"); +: "memory", "cc"); return _res; } @@ -638,7 +642,10 @@ do \ " .set noreorder \n" \ " .set nomacro \n" \ " sync \n" \ - " .set pop "); \ + " .set pop " +: +: +: "memory"); \ *((volatile slock_t *) (lock)) = 0; \ } while (0) @@ -680,7 +687,7 @@ tas(volatile slock_t *lock) " xor #1,%0 \n" : "=z"(_res), "+m"(*lock) : "r"(lock) -: "memory", "t"); +: "memory", "t", "cc"); return _res; } @@ -786,12 +793,13 @@ tas(volatile slock_t *lock) " ldcwx 0(0,%2),%0 \n" : "=r"(lockval), "+m"(*lockword) : "r"(lockword) -: "memory"); +: "memory", "cc"); return (lockval == 0); } #endif /* __GNUC__ */ +/* XXX: no barrier semantics */ #define S_UNLOCK(lock) (*TAS_ACTIVE_WORD(lock) = -1) #define S_INIT_LOCK(lock) \ @@ -827,6 +835,7 @@ tas(volatile slock_t *lock) typedef unsigned int slock_t; #include <ia64/sys/inline.h> +/* xchg implies acquire semantics */ #define TAS(lock) _Asm_xchg(_SZ_W, lock, 1, _LDHINT_NONE) /* On IA64, it's a win to use a non-locking test before the xchg proper */ #define TAS_SPIN(lock) (*(lock) ? 1 : TAS(lock)) @@ -942,7 +951,17 @@ extern int tas_sema(volatile slock_t *lock); #endif /* S_LOCK_FREE */ #if !defined(S_UNLOCK) -#define S_UNLOCK(lock) (*((volatile slock_t *) (lock)) = 0) +#define S_UNLOCK(lock) \ + do \ + { \ + pg_write_barrier(); \ + (*((volatile slock_t *) (lock)) = 0); \ + } while (0) +/* + * Version without implied memory barrier, only to be used in the spinlock + * based fallback for memory barriers. + */ +#define S_UNLOCKED_UNLOCK(lock) (*((volatile slock_t *) (lock)) = 0) #endif /* S_UNLOCK */ #if !defined(S_INIT_LOCK) @@ -964,6 +983,8 @@ extern int tas(volatile slock_t *lock); /* in port/.../tas.s, or #define TAS_SPIN(lock) TAS(lock) #endif /* TAS_SPIN */ +/* spinlock used to enforce barrier semantics */ +extern slock_t dummy_spinlock; /* * Platform-independent out-of-line support routines
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers