On Wed, Jan 17, 2018 at 12:36:31AM -0800, Noah Misch wrote:
> On Tue, Jan 16, 2018 at 08:50:24AM -0800, Andres Freund wrote:
> > On 2018-01-16 16:12:11 +0900, Michael Paquier wrote:
> > > On Fri, Feb 03, 2017 at 12:26:50AM +0000, Noah Misch wrote:
> > > > Since this emits double syncs with older xlc, I recommend instead 
> > > > replacing
> > > > the whole thing with inline asm.  As I opined in the last message of the
> > > > thread you linked above, the intrinsics provide little value as 
> > > > abstractions
> > > > if one checks the generated code to deduce how to use them.  Now that 
> > > > the
> > > > generated code is xlc-version-dependent, the port is better off with
> > > > compiler-independent asm like we have for ppc in s_lock.h.
> > > 
> > > Could it be cleaner to just use __xlc_ver__ to avoid double syncs on
> > > past versions? I think that it would make the code more understandable
> > > than just listing directly the instructions.
> > 
> > Given the quality of the intrinsics on AIX, see past commits and the
> > comment in the code quoted above, I think we're much better of doing
> > this via inline asm.
> 
> For me, verifiability is the crucial benefit of inline asm.  Anyone with an
> architecture manual can thoroughly review an inline asm implementation.  Given
> intrinsics and __xlc_ver__ conditionals, the same level of review requires
> access to every xlc version.

> > > As Heikki is not around these days, Noah, could you provide a new
> > > version of the patch? This bug has been around for some time now, it
> > > would be nice to move on.. 
> 
> Not soon.

Done.  fetch-add-variable-test-v1.patch just adds tests for non-constant
addends and 16-bit edge cases.  Today's implementation handles those,
PostgreSQL doesn't use them, and I might easily have broken them.
fetch-add-xlc-asm-v1.patch moves xlc builds from the __fetch_and_add()
intrinsic to inline asm.  fetch-add-gcc-xlc-unify-v1.patch moves fetch_add to
inline asm for all other ppc compilers.  gcc-7.2.0 generates equivalent code
before and after.  I plan to keep the third patch HEAD-only, back-patching the
other two.  I tested with xlc v12 and v13.
diff --git a/src/test/regress/regress.c b/src/test/regress/regress.c
index 7f03b7e..b00b76f 100644
--- a/src/test/regress/regress.c
+++ b/src/test/regress/regress.c
@@ -687,7 +687,7 @@ test_atomic_uint32(void)
        if (pg_atomic_read_u32(&var) != 3)
                elog(ERROR, "atomic_read_u32() #2 wrong");
 
-       if (pg_atomic_fetch_add_u32(&var, 1) != 3)
+       if (pg_atomic_fetch_add_u32(&var, pg_atomic_read_u32(&var) - 2) != 3)
                elog(ERROR, "atomic_fetch_add_u32() #1 wrong");
 
        if (pg_atomic_fetch_sub_u32(&var, 1) != 4)
@@ -712,6 +712,20 @@ test_atomic_uint32(void)
        if (pg_atomic_fetch_add_u32(&var, INT_MAX) != INT_MAX)
                elog(ERROR, "pg_atomic_add_fetch_u32() #3 wrong");
 
+       pg_atomic_fetch_add_u32(&var, 2);       /* wrap to 0 */
+
+       if (pg_atomic_fetch_add_u32(&var, PG_INT16_MAX) != 0)
+               elog(ERROR, "pg_atomic_fetch_add_u32() #3 wrong");
+
+       if (pg_atomic_fetch_add_u32(&var, PG_INT16_MAX + 1) != PG_INT16_MAX)
+               elog(ERROR, "pg_atomic_fetch_add_u32() #4 wrong");
+
+       if (pg_atomic_fetch_add_u32(&var, PG_INT16_MIN) != 2 * PG_INT16_MAX + 1)
+               elog(ERROR, "pg_atomic_fetch_add_u32() #5 wrong");
+
+       if (pg_atomic_fetch_add_u32(&var, PG_INT16_MIN - 1) != PG_INT16_MAX)
+               elog(ERROR, "pg_atomic_fetch_add_u32() #6 wrong");
+
        pg_atomic_fetch_add_u32(&var, 1);       /* top up to UINT_MAX */
 
        if (pg_atomic_read_u32(&var) != UINT_MAX)
@@ -787,7 +801,7 @@ test_atomic_uint64(void)
        if (pg_atomic_read_u64(&var) != 3)
                elog(ERROR, "atomic_read_u64() #2 wrong");
 
-       if (pg_atomic_fetch_add_u64(&var, 1) != 3)
+       if (pg_atomic_fetch_add_u64(&var, pg_atomic_read_u64(&var) - 2) != 3)
                elog(ERROR, "atomic_fetch_add_u64() #1 wrong");
 
        if (pg_atomic_fetch_sub_u64(&var, 1) != 4)
diff --git a/src/include/port/atomics/generic-xlc.h 
b/src/include/port/atomics/generic-xlc.h
index 5ddccff..8b5c732 100644
--- a/src/include/port/atomics/generic-xlc.h
+++ b/src/include/port/atomics/generic-xlc.h
@@ -73,11 +73,27 @@ pg_atomic_compare_exchange_u32_impl(volatile 
pg_atomic_uint32 *ptr,
 static inline uint32
 pg_atomic_fetch_add_u32_impl(volatile pg_atomic_uint32 *ptr, int32 add_)
 {
+       uint32 _t;
+       uint32 res;
+
        /*
-        * __fetch_and_add() emits a leading "sync" and trailing "isync", 
thereby
-        * providing sequential consistency.  This is undocumented.
+        * xlc has a no-longer-documented __fetch_and_add() intrinsic.  In xlc
+        * 12.01.0000.0000, it emits a leading "sync" and trailing "isync".  In
+        * xlc 13.01.0003.0004, it emits neither.  Hence, using the intrinsic
+        * would add redundant syncs on xlc 12.
         */
-       return __fetch_and_add((volatile int *)&ptr->value, add_);
+       __asm__ __volatile__(
+               "       sync                            \n"
+               "       lwarx   %1,0,%4         \n"
+               "       add     %0,%1,%3        \n"
+               "       stwcx.  %0,0,%4         \n"
+               "       bne     $-12            \n"             /* branch to 
lwarx */
+               "       isync                           \n"
+:              "=&r"(_t), "=&r"(res), "+m"(ptr->value)
+:              "r"(add_), "r"(&ptr->value)
+:              "memory", "cc");
+
+       return res;
 }
 
 #ifdef PG_HAVE_ATOMIC_U64_SUPPORT
@@ -103,7 +119,22 @@ pg_atomic_compare_exchange_u64_impl(volatile 
pg_atomic_uint64 *ptr,
 static inline uint64
 pg_atomic_fetch_add_u64_impl(volatile pg_atomic_uint64 *ptr, int64 add_)
 {
-       return __fetch_and_addlp((volatile long *)&ptr->value, add_);
+       uint64 _t;
+       uint64 res;
+
+       /* Like u32, but s/lwarx/ldarx/; s/stwcx/stdcx/ */
+       __asm__ __volatile__(
+               "       sync                            \n"
+               "       ldarx   %1,0,%4         \n"
+               "       add     %0,%1,%3        \n"
+               "       stdcx.  %0,0,%4         \n"
+               "       bne     $-12            \n"             /* branch to 
ldarx */
+               "       isync                           \n"
+:              "=&r"(_t), "=&r"(res), "+m"(ptr->value)
+:              "r"(add_), "r"(&ptr->value)
+:              "memory", "cc");
+
+       return res;
 }
 
 #endif /* PG_HAVE_ATOMIC_U64_SUPPORT */
diff --git a/configure b/configure
index 7a6bfc2..d6ecb33 100755
--- a/configure
+++ b/configure
@@ -14650,6 +14650,46 @@ $as_echo "$pgac_cv_have_ppc_mutex_hint" >&6; }
 $as_echo "#define HAVE_PPC_LWARX_MUTEX_HINT 1" >>confdefs.h
 
     fi
+    # Check if compiler accepts "i"(x) when __builtin_constant_p(x).
+    { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether 
__builtin_constant_p(x) implies \"i\"(x) acceptance" >&5
+$as_echo_n "checking whether __builtin_constant_p(x) implies \"i\"(x) 
acceptance... " >&6; }
+if ${pgac_cv_have_i_constraint__builtin_constant_p+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+static inline int
+     addi(int ra, int si)
+     {
+         int res = 0;
+         if (__builtin_constant_p(si))
+             __asm__ __volatile__(
+                 " addi %0,%1,%2\n" : "=r"(res) : "r"(ra), "i"(si));
+         return res;
+     }
+     int test_adds(int x) { return addi(3, x) + addi(x, 5); }
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  pgac_cv_have_i_constraint__builtin_constant_p=yes
+else
+  pgac_cv_have_i_constraint__builtin_constant_p=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: 
$pgac_cv_have_i_constraint__builtin_constant_p" >&5
+$as_echo "$pgac_cv_have_i_constraint__builtin_constant_p" >&6; }
+    if test x"$pgac_cv_have_i_constraint__builtin_constant_p" = xyes ; then
+
+$as_echo "#define HAVE_I_CONSTRAINT__BUILTIN_CONSTANT_P 1" >>confdefs.h
+
+    fi
   ;;
 esac
 
diff --git a/configure.in b/configure.in
index dde3eec..510bd76 100644
--- a/configure.in
+++ b/configure.in
@@ -1541,6 +1541,26 @@ case $host_cpu in
     if test x"$pgac_cv_have_ppc_mutex_hint" = xyes ; then
        AC_DEFINE(HAVE_PPC_LWARX_MUTEX_HINT, 1, [Define to 1 if the assembler 
supports PPC's LWARX mutex hint bit.])
     fi
+    # Check if compiler accepts "i"(x) when __builtin_constant_p(x).
+    AC_CACHE_CHECK([whether __builtin_constant_p(x) implies "i"(x) acceptance],
+                   [pgac_cv_have_i_constraint__builtin_constant_p],
+    [AC_COMPILE_IFELSE([AC_LANG_PROGRAM(
+    [static inline int
+     addi(int ra, int si)
+     {
+         int res = 0;
+         if (__builtin_constant_p(si))
+             __asm__ __volatile__(
+                 " addi %0,%1,%2\n" : "=r"(res) : "r"(ra), "i"(si));
+         return res;
+     }
+     int test_adds(int x) { return addi(3, x) + addi(x, 5); }], [])],
+    [pgac_cv_have_i_constraint__builtin_constant_p=yes],
+    [pgac_cv_have_i_constraint__builtin_constant_p=no])])
+    if test x"$pgac_cv_have_i_constraint__builtin_constant_p" = xyes ; then
+      AC_DEFINE(HAVE_I_CONSTRAINT__BUILTIN_CONSTANT_P, 1,
+                [Define to 1 if __builtin_constant_p(x) implies "i"(x) 
acceptance.])
+    fi
   ;;
 esac
 
diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in
index 512213a..9be84fc 100644
--- a/src/include/pg_config.h.in
+++ b/src/include/pg_config.h.in
@@ -332,6 +332,9 @@
 /* Define to 1 if you have isinf(). */
 #undef HAVE_ISINF
 
+/* Define to 1 if __builtin_constant_p(x) implies "i"(x) acceptance. */
+#undef HAVE_I_CONSTRAINT__BUILTIN_CONSTANT_P
+
 /* Define to 1 if you have the <langinfo.h> header file. */
 #undef HAVE_LANGINFO_H
 
diff --git a/src/include/port/atomics/arch-ppc.h 
b/src/include/port/atomics/arch-ppc.h
index 344b394..35d602e 100644
--- a/src/include/port/atomics/arch-ppc.h
+++ b/src/include/port/atomics/arch-ppc.h
@@ -25,5 +25,103 @@
 #define pg_write_barrier_impl()                __asm__ __volatile__ ("lwsync" 
: : : "memory")
 #endif
 
+#define PG_HAVE_ATOMIC_U32_SUPPORT
+typedef struct pg_atomic_uint32
+{
+       volatile uint32 value;
+} pg_atomic_uint32;
+
+/* 64bit atomics are only supported in 64bit mode */
+#ifdef __64BIT__
+#define PG_HAVE_ATOMIC_U64_SUPPORT
+typedef struct pg_atomic_uint64
+{
+       volatile uint64 value pg_attribute_aligned(8);
+} pg_atomic_uint64;
+
+#endif /* __64BIT__ */
+
+#define PG_HAVE_ATOMIC_FETCH_ADD_U32
+static inline uint32
+pg_atomic_fetch_add_u32_impl(volatile pg_atomic_uint32 *ptr, int32 add_)
+{
+       uint32 _t;
+       uint32 res;
+
+       /*
+        * xlc has a no-longer-documented __fetch_and_add() intrinsic.  In xlc
+        * 12.01.0000.0000, it emits a leading "sync" and trailing "isync".  In
+        * xlc 13.01.0003.0004, it emits neither.  Hence, using the intrinsic
+        * would add redundant syncs on xlc 12.
+        */
+#ifdef HAVE_I_CONSTRAINT__BUILTIN_CONSTANT_P
+       if (__builtin_constant_p(add_) &&
+               add_ <= PG_INT16_MAX && add_ >= PG_INT16_MIN)
+               __asm__ __volatile__(
+                       "       sync                            \n"
+                       "       lwarx   %1,0,%4         \n"
+                       "       addi    %0,%1,%3        \n"
+                       "       stwcx.  %0,0,%4         \n"
+                       "       bne     $-12            \n"             /* 
branch to lwarx */
+                       "       isync                           \n"
+:                      "=&r"(_t), "=&r"(res), "+m"(ptr->value)
+:                      "i"(add_), "r"(&ptr->value)
+:                      "memory", "cc");
+       else
+#endif
+               __asm__ __volatile__(
+                       "       sync                            \n"
+                       "       lwarx   %1,0,%4         \n"
+                       "       add     %0,%1,%3        \n"
+                       "       stwcx.  %0,0,%4         \n"
+                       "       bne     $-12            \n"             /* 
branch to lwarx */
+                       "       isync                           \n"
+:                      "=&r"(_t), "=&r"(res), "+m"(ptr->value)
+:                      "r"(add_), "r"(&ptr->value)
+:                      "memory", "cc");
+
+       return res;
+}
+
+#ifdef PG_HAVE_ATOMIC_U64_SUPPORT
+#define PG_HAVE_ATOMIC_FETCH_ADD_U64
+static inline uint64
+pg_atomic_fetch_add_u64_impl(volatile pg_atomic_uint64 *ptr, int64 add_)
+{
+       uint64 _t;
+       uint64 res;
+
+       /* Like u32, but s/lwarx/ldarx/; s/stwcx/stdcx/ */
+#ifdef HAVE_I_CONSTRAINT__BUILTIN_CONSTANT_P
+       if (__builtin_constant_p(add_) &&
+               add_ <= PG_INT16_MAX && add_ >= PG_INT16_MIN)
+               __asm__ __volatile__(
+                       "       sync                            \n"
+                       "       ldarx   %1,0,%4         \n"
+                       "       addi    %0,%1,%3        \n"
+                       "       stdcx.  %0,0,%4         \n"
+                       "       bne     $-12            \n"             /* 
branch to ldarx */
+                       "       isync                           \n"
+:                      "=&r"(_t), "=&r"(res), "+m"(ptr->value)
+:                      "i"(add_), "r"(&ptr->value)
+:                      "memory", "cc");
+       else
+#endif
+               __asm__ __volatile__(
+                       "       sync                            \n"
+                       "       ldarx   %1,0,%4         \n"
+                       "       add     %0,%1,%3        \n"
+                       "       stdcx.  %0,0,%4         \n"
+                       "       bne     $-12            \n"             /* 
branch to ldarx */
+                       "       isync                           \n"
+:                      "=&r"(_t), "=&r"(res), "+m"(ptr->value)
+:                      "r"(add_), "r"(&ptr->value)
+:                      "memory", "cc");
+
+       return res;
+}
+
+#endif /* PG_HAVE_ATOMIC_U64_SUPPORT */
+
 /* per architecture manual doubleword accesses have single copy atomicity */
 #define PG_HAVE_8BYTE_SINGLE_COPY_ATOMICITY
diff --git a/src/include/port/atomics/generic-xlc.h 
b/src/include/port/atomics/generic-xlc.h
index 8b5c732..8330b45 100644
--- a/src/include/port/atomics/generic-xlc.h
+++ b/src/include/port/atomics/generic-xlc.h
@@ -18,23 +18,6 @@
 
 #if defined(HAVE_ATOMICS)
 
-#define PG_HAVE_ATOMIC_U32_SUPPORT
-typedef struct pg_atomic_uint32
-{
-       volatile uint32 value;
-} pg_atomic_uint32;
-
-
-/* 64bit atomics are only supported in 64bit mode */
-#ifdef __64BIT__
-#define PG_HAVE_ATOMIC_U64_SUPPORT
-typedef struct pg_atomic_uint64
-{
-       volatile uint64 value pg_attribute_aligned(8);
-} pg_atomic_uint64;
-
-#endif /* __64BIT__ */
-
 #define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32
 static inline bool
 pg_atomic_compare_exchange_u32_impl(volatile pg_atomic_uint32 *ptr,
@@ -69,33 +52,6 @@ pg_atomic_compare_exchange_u32_impl(volatile 
pg_atomic_uint32 *ptr,
        return ret;
 }
 
-#define PG_HAVE_ATOMIC_FETCH_ADD_U32
-static inline uint32
-pg_atomic_fetch_add_u32_impl(volatile pg_atomic_uint32 *ptr, int32 add_)
-{
-       uint32 _t;
-       uint32 res;
-
-       /*
-        * xlc has a no-longer-documented __fetch_and_add() intrinsic.  In xlc
-        * 12.01.0000.0000, it emits a leading "sync" and trailing "isync".  In
-        * xlc 13.01.0003.0004, it emits neither.  Hence, using the intrinsic
-        * would add redundant syncs on xlc 12.
-        */
-       __asm__ __volatile__(
-               "       sync                            \n"
-               "       lwarx   %1,0,%4         \n"
-               "       add     %0,%1,%3        \n"
-               "       stwcx.  %0,0,%4         \n"
-               "       bne     $-12            \n"             /* branch to 
lwarx */
-               "       isync                           \n"
-:              "=&r"(_t), "=&r"(res), "+m"(ptr->value)
-:              "r"(add_), "r"(&ptr->value)
-:              "memory", "cc");
-
-       return res;
-}
-
 #ifdef PG_HAVE_ATOMIC_U64_SUPPORT
 
 #define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64
@@ -115,28 +71,6 @@ pg_atomic_compare_exchange_u64_impl(volatile 
pg_atomic_uint64 *ptr,
        return ret;
 }
 
-#define PG_HAVE_ATOMIC_FETCH_ADD_U64
-static inline uint64
-pg_atomic_fetch_add_u64_impl(volatile pg_atomic_uint64 *ptr, int64 add_)
-{
-       uint64 _t;
-       uint64 res;
-
-       /* Like u32, but s/lwarx/ldarx/; s/stwcx/stdcx/ */
-       __asm__ __volatile__(
-               "       sync                            \n"
-               "       ldarx   %1,0,%4         \n"
-               "       add     %0,%1,%3        \n"
-               "       stdcx.  %0,0,%4         \n"
-               "       bne     $-12            \n"             /* branch to 
ldarx */
-               "       isync                           \n"
-:              "=&r"(_t), "=&r"(res), "+m"(ptr->value)
-:              "r"(add_), "r"(&ptr->value)
-:              "memory", "cc");
-
-       return res;
-}
-
 #endif /* PG_HAVE_ATOMIC_U64_SUPPORT */
 
 #endif /* defined(HAVE_ATOMICS) */

Reply via email to