On Wed, Jan 17, 2018 at 12:36:31AM -0800, Noah Misch wrote: > On Tue, Jan 16, 2018 at 08:50:24AM -0800, Andres Freund wrote: > > On 2018-01-16 16:12:11 +0900, Michael Paquier wrote: > > > On Fri, Feb 03, 2017 at 12:26:50AM +0000, Noah Misch wrote: > > > > Since this emits double syncs with older xlc, I recommend instead > > > > replacing > > > > the whole thing with inline asm. As I opined in the last message of the > > > > thread you linked above, the intrinsics provide little value as > > > > abstractions > > > > if one checks the generated code to deduce how to use them. Now that > > > > the > > > > generated code is xlc-version-dependent, the port is better off with > > > > compiler-independent asm like we have for ppc in s_lock.h. > > > > > > Could it be cleaner to just use __xlc_ver__ to avoid double syncs on > > > past versions? I think that it would make the code more understandable > > > than just listing directly the instructions. > > > > Given the quality of the intrinsics on AIX, see past commits and the > > comment in the code quoted above, I think we're much better of doing > > this via inline asm. > > For me, verifiability is the crucial benefit of inline asm. Anyone with an > architecture manual can thoroughly review an inline asm implementation. Given > intrinsics and __xlc_ver__ conditionals, the same level of review requires > access to every xlc version.
> > > As Heikki is not around these days, Noah, could you provide a new > > > version of the patch? This bug has been around for some time now, it > > > would be nice to move on.. > > Not soon. Done. fetch-add-variable-test-v1.patch just adds tests for non-constant addends and 16-bit edge cases. Today's implementation handles those, PostgreSQL doesn't use them, and I might easily have broken them. fetch-add-xlc-asm-v1.patch moves xlc builds from the __fetch_and_add() intrinsic to inline asm. fetch-add-gcc-xlc-unify-v1.patch moves fetch_add to inline asm for all other ppc compilers. gcc-7.2.0 generates equivalent code before and after. I plan to keep the third patch HEAD-only, back-patching the other two. I tested with xlc v12 and v13.
diff --git a/src/test/regress/regress.c b/src/test/regress/regress.c index 7f03b7e..b00b76f 100644 --- a/src/test/regress/regress.c +++ b/src/test/regress/regress.c @@ -687,7 +687,7 @@ test_atomic_uint32(void) if (pg_atomic_read_u32(&var) != 3) elog(ERROR, "atomic_read_u32() #2 wrong"); - if (pg_atomic_fetch_add_u32(&var, 1) != 3) + if (pg_atomic_fetch_add_u32(&var, pg_atomic_read_u32(&var) - 2) != 3) elog(ERROR, "atomic_fetch_add_u32() #1 wrong"); if (pg_atomic_fetch_sub_u32(&var, 1) != 4) @@ -712,6 +712,20 @@ test_atomic_uint32(void) if (pg_atomic_fetch_add_u32(&var, INT_MAX) != INT_MAX) elog(ERROR, "pg_atomic_add_fetch_u32() #3 wrong"); + pg_atomic_fetch_add_u32(&var, 2); /* wrap to 0 */ + + if (pg_atomic_fetch_add_u32(&var, PG_INT16_MAX) != 0) + elog(ERROR, "pg_atomic_fetch_add_u32() #3 wrong"); + + if (pg_atomic_fetch_add_u32(&var, PG_INT16_MAX + 1) != PG_INT16_MAX) + elog(ERROR, "pg_atomic_fetch_add_u32() #4 wrong"); + + if (pg_atomic_fetch_add_u32(&var, PG_INT16_MIN) != 2 * PG_INT16_MAX + 1) + elog(ERROR, "pg_atomic_fetch_add_u32() #5 wrong"); + + if (pg_atomic_fetch_add_u32(&var, PG_INT16_MIN - 1) != PG_INT16_MAX) + elog(ERROR, "pg_atomic_fetch_add_u32() #6 wrong"); + pg_atomic_fetch_add_u32(&var, 1); /* top up to UINT_MAX */ if (pg_atomic_read_u32(&var) != UINT_MAX) @@ -787,7 +801,7 @@ test_atomic_uint64(void) if (pg_atomic_read_u64(&var) != 3) elog(ERROR, "atomic_read_u64() #2 wrong"); - if (pg_atomic_fetch_add_u64(&var, 1) != 3) + if (pg_atomic_fetch_add_u64(&var, pg_atomic_read_u64(&var) - 2) != 3) elog(ERROR, "atomic_fetch_add_u64() #1 wrong"); if (pg_atomic_fetch_sub_u64(&var, 1) != 4)
diff --git a/src/include/port/atomics/generic-xlc.h b/src/include/port/atomics/generic-xlc.h index 5ddccff..8b5c732 100644 --- a/src/include/port/atomics/generic-xlc.h +++ b/src/include/port/atomics/generic-xlc.h @@ -73,11 +73,27 @@ pg_atomic_compare_exchange_u32_impl(volatile pg_atomic_uint32 *ptr, static inline uint32 pg_atomic_fetch_add_u32_impl(volatile pg_atomic_uint32 *ptr, int32 add_) { + uint32 _t; + uint32 res; + /* - * __fetch_and_add() emits a leading "sync" and trailing "isync", thereby - * providing sequential consistency. This is undocumented. + * xlc has a no-longer-documented __fetch_and_add() intrinsic. In xlc + * 12.01.0000.0000, it emits a leading "sync" and trailing "isync". In + * xlc 13.01.0003.0004, it emits neither. Hence, using the intrinsic + * would add redundant syncs on xlc 12. */ - return __fetch_and_add((volatile int *)&ptr->value, add_); + __asm__ __volatile__( + " sync \n" + " lwarx %1,0,%4 \n" + " add %0,%1,%3 \n" + " stwcx. %0,0,%4 \n" + " bne $-12 \n" /* branch to lwarx */ + " isync \n" +: "=&r"(_t), "=&r"(res), "+m"(ptr->value) +: "r"(add_), "r"(&ptr->value) +: "memory", "cc"); + + return res; } #ifdef PG_HAVE_ATOMIC_U64_SUPPORT @@ -103,7 +119,22 @@ pg_atomic_compare_exchange_u64_impl(volatile pg_atomic_uint64 *ptr, static inline uint64 pg_atomic_fetch_add_u64_impl(volatile pg_atomic_uint64 *ptr, int64 add_) { - return __fetch_and_addlp((volatile long *)&ptr->value, add_); + uint64 _t; + uint64 res; + + /* Like u32, but s/lwarx/ldarx/; s/stwcx/stdcx/ */ + __asm__ __volatile__( + " sync \n" + " ldarx %1,0,%4 \n" + " add %0,%1,%3 \n" + " stdcx. %0,0,%4 \n" + " bne $-12 \n" /* branch to ldarx */ + " isync \n" +: "=&r"(_t), "=&r"(res), "+m"(ptr->value) +: "r"(add_), "r"(&ptr->value) +: "memory", "cc"); + + return res; } #endif /* PG_HAVE_ATOMIC_U64_SUPPORT */
diff --git a/configure b/configure index 7a6bfc2..d6ecb33 100755 --- a/configure +++ b/configure @@ -14650,6 +14650,46 @@ $as_echo "$pgac_cv_have_ppc_mutex_hint" >&6; } $as_echo "#define HAVE_PPC_LWARX_MUTEX_HINT 1" >>confdefs.h fi + # Check if compiler accepts "i"(x) when __builtin_constant_p(x). + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether __builtin_constant_p(x) implies \"i\"(x) acceptance" >&5 +$as_echo_n "checking whether __builtin_constant_p(x) implies \"i\"(x) acceptance... " >&6; } +if ${pgac_cv_have_i_constraint__builtin_constant_p+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +static inline int + addi(int ra, int si) + { + int res = 0; + if (__builtin_constant_p(si)) + __asm__ __volatile__( + " addi %0,%1,%2\n" : "=r"(res) : "r"(ra), "i"(si)); + return res; + } + int test_adds(int x) { return addi(3, x) + addi(x, 5); } +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + pgac_cv_have_i_constraint__builtin_constant_p=yes +else + pgac_cv_have_i_constraint__builtin_constant_p=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv_have_i_constraint__builtin_constant_p" >&5 +$as_echo "$pgac_cv_have_i_constraint__builtin_constant_p" >&6; } + if test x"$pgac_cv_have_i_constraint__builtin_constant_p" = xyes ; then + +$as_echo "#define HAVE_I_CONSTRAINT__BUILTIN_CONSTANT_P 1" >>confdefs.h + + fi ;; esac diff --git a/configure.in b/configure.in index dde3eec..510bd76 100644 --- a/configure.in +++ b/configure.in @@ -1541,6 +1541,26 @@ case $host_cpu in if test x"$pgac_cv_have_ppc_mutex_hint" = xyes ; then AC_DEFINE(HAVE_PPC_LWARX_MUTEX_HINT, 1, [Define to 1 if the assembler supports PPC's LWARX mutex hint bit.]) fi + # Check if compiler accepts "i"(x) when __builtin_constant_p(x). + AC_CACHE_CHECK([whether __builtin_constant_p(x) implies "i"(x) acceptance], + [pgac_cv_have_i_constraint__builtin_constant_p], + [AC_COMPILE_IFELSE([AC_LANG_PROGRAM( + [static inline int + addi(int ra, int si) + { + int res = 0; + if (__builtin_constant_p(si)) + __asm__ __volatile__( + " addi %0,%1,%2\n" : "=r"(res) : "r"(ra), "i"(si)); + return res; + } + int test_adds(int x) { return addi(3, x) + addi(x, 5); }], [])], + [pgac_cv_have_i_constraint__builtin_constant_p=yes], + [pgac_cv_have_i_constraint__builtin_constant_p=no])]) + if test x"$pgac_cv_have_i_constraint__builtin_constant_p" = xyes ; then + AC_DEFINE(HAVE_I_CONSTRAINT__BUILTIN_CONSTANT_P, 1, + [Define to 1 if __builtin_constant_p(x) implies "i"(x) acceptance.]) + fi ;; esac diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in index 512213a..9be84fc 100644 --- a/src/include/pg_config.h.in +++ b/src/include/pg_config.h.in @@ -332,6 +332,9 @@ /* Define to 1 if you have isinf(). */ #undef HAVE_ISINF +/* Define to 1 if __builtin_constant_p(x) implies "i"(x) acceptance. */ +#undef HAVE_I_CONSTRAINT__BUILTIN_CONSTANT_P + /* Define to 1 if you have the <langinfo.h> header file. */ #undef HAVE_LANGINFO_H diff --git a/src/include/port/atomics/arch-ppc.h b/src/include/port/atomics/arch-ppc.h index 344b394..35d602e 100644 --- a/src/include/port/atomics/arch-ppc.h +++ b/src/include/port/atomics/arch-ppc.h @@ -25,5 +25,103 @@ #define pg_write_barrier_impl() __asm__ __volatile__ ("lwsync" : : : "memory") #endif +#define PG_HAVE_ATOMIC_U32_SUPPORT +typedef struct pg_atomic_uint32 +{ + volatile uint32 value; +} pg_atomic_uint32; + +/* 64bit atomics are only supported in 64bit mode */ +#ifdef __64BIT__ +#define PG_HAVE_ATOMIC_U64_SUPPORT +typedef struct pg_atomic_uint64 +{ + volatile uint64 value pg_attribute_aligned(8); +} pg_atomic_uint64; + +#endif /* __64BIT__ */ + +#define PG_HAVE_ATOMIC_FETCH_ADD_U32 +static inline uint32 +pg_atomic_fetch_add_u32_impl(volatile pg_atomic_uint32 *ptr, int32 add_) +{ + uint32 _t; + uint32 res; + + /* + * xlc has a no-longer-documented __fetch_and_add() intrinsic. In xlc + * 12.01.0000.0000, it emits a leading "sync" and trailing "isync". In + * xlc 13.01.0003.0004, it emits neither. Hence, using the intrinsic + * would add redundant syncs on xlc 12. + */ +#ifdef HAVE_I_CONSTRAINT__BUILTIN_CONSTANT_P + if (__builtin_constant_p(add_) && + add_ <= PG_INT16_MAX && add_ >= PG_INT16_MIN) + __asm__ __volatile__( + " sync \n" + " lwarx %1,0,%4 \n" + " addi %0,%1,%3 \n" + " stwcx. %0,0,%4 \n" + " bne $-12 \n" /* branch to lwarx */ + " isync \n" +: "=&r"(_t), "=&r"(res), "+m"(ptr->value) +: "i"(add_), "r"(&ptr->value) +: "memory", "cc"); + else +#endif + __asm__ __volatile__( + " sync \n" + " lwarx %1,0,%4 \n" + " add %0,%1,%3 \n" + " stwcx. %0,0,%4 \n" + " bne $-12 \n" /* branch to lwarx */ + " isync \n" +: "=&r"(_t), "=&r"(res), "+m"(ptr->value) +: "r"(add_), "r"(&ptr->value) +: "memory", "cc"); + + return res; +} + +#ifdef PG_HAVE_ATOMIC_U64_SUPPORT +#define PG_HAVE_ATOMIC_FETCH_ADD_U64 +static inline uint64 +pg_atomic_fetch_add_u64_impl(volatile pg_atomic_uint64 *ptr, int64 add_) +{ + uint64 _t; + uint64 res; + + /* Like u32, but s/lwarx/ldarx/; s/stwcx/stdcx/ */ +#ifdef HAVE_I_CONSTRAINT__BUILTIN_CONSTANT_P + if (__builtin_constant_p(add_) && + add_ <= PG_INT16_MAX && add_ >= PG_INT16_MIN) + __asm__ __volatile__( + " sync \n" + " ldarx %1,0,%4 \n" + " addi %0,%1,%3 \n" + " stdcx. %0,0,%4 \n" + " bne $-12 \n" /* branch to ldarx */ + " isync \n" +: "=&r"(_t), "=&r"(res), "+m"(ptr->value) +: "i"(add_), "r"(&ptr->value) +: "memory", "cc"); + else +#endif + __asm__ __volatile__( + " sync \n" + " ldarx %1,0,%4 \n" + " add %0,%1,%3 \n" + " stdcx. %0,0,%4 \n" + " bne $-12 \n" /* branch to ldarx */ + " isync \n" +: "=&r"(_t), "=&r"(res), "+m"(ptr->value) +: "r"(add_), "r"(&ptr->value) +: "memory", "cc"); + + return res; +} + +#endif /* PG_HAVE_ATOMIC_U64_SUPPORT */ + /* per architecture manual doubleword accesses have single copy atomicity */ #define PG_HAVE_8BYTE_SINGLE_COPY_ATOMICITY diff --git a/src/include/port/atomics/generic-xlc.h b/src/include/port/atomics/generic-xlc.h index 8b5c732..8330b45 100644 --- a/src/include/port/atomics/generic-xlc.h +++ b/src/include/port/atomics/generic-xlc.h @@ -18,23 +18,6 @@ #if defined(HAVE_ATOMICS) -#define PG_HAVE_ATOMIC_U32_SUPPORT -typedef struct pg_atomic_uint32 -{ - volatile uint32 value; -} pg_atomic_uint32; - - -/* 64bit atomics are only supported in 64bit mode */ -#ifdef __64BIT__ -#define PG_HAVE_ATOMIC_U64_SUPPORT -typedef struct pg_atomic_uint64 -{ - volatile uint64 value pg_attribute_aligned(8); -} pg_atomic_uint64; - -#endif /* __64BIT__ */ - #define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32 static inline bool pg_atomic_compare_exchange_u32_impl(volatile pg_atomic_uint32 *ptr, @@ -69,33 +52,6 @@ pg_atomic_compare_exchange_u32_impl(volatile pg_atomic_uint32 *ptr, return ret; } -#define PG_HAVE_ATOMIC_FETCH_ADD_U32 -static inline uint32 -pg_atomic_fetch_add_u32_impl(volatile pg_atomic_uint32 *ptr, int32 add_) -{ - uint32 _t; - uint32 res; - - /* - * xlc has a no-longer-documented __fetch_and_add() intrinsic. In xlc - * 12.01.0000.0000, it emits a leading "sync" and trailing "isync". In - * xlc 13.01.0003.0004, it emits neither. Hence, using the intrinsic - * would add redundant syncs on xlc 12. - */ - __asm__ __volatile__( - " sync \n" - " lwarx %1,0,%4 \n" - " add %0,%1,%3 \n" - " stwcx. %0,0,%4 \n" - " bne $-12 \n" /* branch to lwarx */ - " isync \n" -: "=&r"(_t), "=&r"(res), "+m"(ptr->value) -: "r"(add_), "r"(&ptr->value) -: "memory", "cc"); - - return res; -} - #ifdef PG_HAVE_ATOMIC_U64_SUPPORT #define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64 @@ -115,28 +71,6 @@ pg_atomic_compare_exchange_u64_impl(volatile pg_atomic_uint64 *ptr, return ret; } -#define PG_HAVE_ATOMIC_FETCH_ADD_U64 -static inline uint64 -pg_atomic_fetch_add_u64_impl(volatile pg_atomic_uint64 *ptr, int64 add_) -{ - uint64 _t; - uint64 res; - - /* Like u32, but s/lwarx/ldarx/; s/stwcx/stdcx/ */ - __asm__ __volatile__( - " sync \n" - " ldarx %1,0,%4 \n" - " add %0,%1,%3 \n" - " stdcx. %0,0,%4 \n" - " bne $-12 \n" /* branch to ldarx */ - " isync \n" -: "=&r"(_t), "=&r"(res), "+m"(ptr->value) -: "r"(add_), "r"(&ptr->value) -: "memory", "cc"); - - return res; -} - #endif /* PG_HAVE_ATOMIC_U64_SUPPORT */ #endif /* defined(HAVE_ATOMICS) */