On Tue, Jul 30, 2024 at 12:39 PM Thomas Munro <thomas.mu...@gmail.com> wrote: > On Tue, Jul 30, 2024 at 11:16 AM Heikki Linnakangas <hlinn...@iki.fi> wrote: > > I think we should do: > > > > #ifdef _M_AMD64 > > #define __x86_64__ > > #endif > > > > somewhere, perhaps in src/include/port/win32.h.
I suppose we could define our own PG_ARCH_{ARM,MIPS,POWER,RISCV,S390,SPARC,X86}_{32,64} in one central place, instead. Draft patch for illustration.
From 4df24c6fe7370cdeacd4e794f4ccc6202a909e62 Mon Sep 17 00:00:00 2001 From: Thomas Munro <thomas.mu...@gmail.com> Date: Thu, 1 Aug 2024 16:38:05 +1200 Subject: [PATCH] Standardize macros for detecting architectures. Instead of repeating multiple compilers' architecture macros throughout the tree, detect them in one central place, and define our own macros of the form: PG_ARCH_{ARM,MIPS,POWER,RISCV,S390,SPARC,X86}_{32,64} This fixes the problem that MSVC builds were unintentionally using suboptimal fallback code defined by "port/atomics.h", due to inconsistent testing for macros. A couple of other places were also affected. XXX This patch doesn't adjust s_lock.h, because it's complicated, full of old dead sub-architectures, and a nearby patch proposes to delete it... Discussion: https://postgr.es/m/CA%2BhUKGKAf_i6w7hB_3pqZXQeqn%2BixvY%2BCMps_n%3DmJ5HAatMjMw%40mail.gmail.com --- contrib/pgcrypto/crypt-blowfish.c | 4 ++-- src/include/c.h | 33 +++++++++++++++++++++++++++++ src/include/port/atomics.h | 6 +++--- src/include/port/atomics/arch-x86.h | 16 +++++++------- src/include/port/pg_bitutils.h | 6 +++--- src/include/port/simd.h | 2 +- src/include/storage/s_lock.h | 2 +- src/port/pg_crc32c_sse42.c | 4 ++-- 8 files changed, 53 insertions(+), 20 deletions(-) diff --git a/contrib/pgcrypto/crypt-blowfish.c b/contrib/pgcrypto/crypt-blowfish.c index 5a1b1e10091..9c4e02e428b 100644 --- a/contrib/pgcrypto/crypt-blowfish.c +++ b/contrib/pgcrypto/crypt-blowfish.c @@ -38,10 +38,10 @@ #include "px-crypt.h" #include "px.h" -#ifdef __i386__ +#if defined(PG_ARCH_X86_32) #define BF_ASM 0 /* 1 */ #define BF_SCALE 1 -#elif defined(__x86_64__) +#elif defined(PG_ARCH_X86_64) #define BF_ASM 0 #define BF_SCALE 1 #else diff --git a/src/include/c.h b/src/include/c.h index dc1841346cd..542cbd33fad 100644 --- a/src/include/c.h +++ b/src/include/c.h @@ -425,6 +425,39 @@ typedef void (*pg_funcptr_t) (void); #define HAVE_PRAGMA_GCC_SYSTEM_HEADER 1 #endif +/* + * Project-standardized name for CPU architectures, to avoid having to repeat + * the names that different compilers use. + */ +#if defined(__arm__) || defined(__arm) +#define PG_ARCH_ARM_32 +#elif defined(__aarch64__) || defined(_M_ARM64) +#define PG_ARCH_ARM_64 +#elif defined(__mips__) +#define PG_ARCH_MIPS_32 +#elif defined(__mips64__) +#define PG_ARCH_MIPS_64 +#elif defined(__ppc__) || defined(__powerpc__) +#define PG_ARCH_POWER_32 +#elif defined(__ppc64__) || defined(__powerpc64__) +#define PG_ARCH_POWER_64 +#elif defined(__riscv__) +#define PG_ARCH_RISCV_32 +#elif defined(__riscv64__) +#define PG_ARCH_RISCV_64 +#elif defined(__s390__) +#define PG_ARCH_S390_32 +#elif defined(__s390x__) +#define PG_ARCH_S390_64 +#elif defined(__sparc) +#define PG_ARCH_SPARC_32 +#elif defined(__sparcv9) +#define PG_ARCH_SPARC_64 +#elif defined(__i386__) || defined (__i386) || defined(_M_IX86) +#define PG_ARCH_X86_32 +#elif defined(__x86_64__) || defined(__x86_64) || defined (__amd64) +#define PG_ARCH_X86_64 +#endif /* ---------------------------------------------------------------- * Section 2: bool, true, false diff --git a/src/include/port/atomics.h b/src/include/port/atomics.h index c0c8688f736..3300ea54c17 100644 --- a/src/include/port/atomics.h +++ b/src/include/port/atomics.h @@ -63,11 +63,11 @@ * compiler barrier. * */ -#if defined(__arm__) || defined(__arm) || defined(__aarch64__) +#if defined(PG_ARCH_ARM_32) || defined(PG_ARCH_ARM_64) #include "port/atomics/arch-arm.h" -#elif defined(__i386__) || defined(__i386) || defined(__x86_64__) +#elif defined(PG_ARCH_X86_32) || defined(PG_ARCH_X86_64) #include "port/atomics/arch-x86.h" -#elif defined(__ppc__) || defined(__powerpc__) || defined(__ppc64__) || defined(__powerpc64__) +#elif defined(PG_ARCH_POWER_32) || defined (PG_ARCH_POWER_64) #include "port/atomics/arch-ppc.h" #endif diff --git a/src/include/port/atomics/arch-x86.h b/src/include/port/atomics/arch-x86.h index c12f8a60697..9f20edf221f 100644 --- a/src/include/port/atomics/arch-x86.h +++ b/src/include/port/atomics/arch-x86.h @@ -32,10 +32,10 @@ */ #if defined(__GNUC__) || defined(__INTEL_COMPILER) -#if defined(__i386__) || defined(__i386) +#if defined(PG_ARCH_X86_32) #define pg_memory_barrier_impl() \ __asm__ __volatile__ ("lock; addl $0,0(%%esp)" : : : "memory", "cc") -#elif defined(__x86_64__) +#elif defined(PG_ARCH_X86_64) #define pg_memory_barrier_impl() \ __asm__ __volatile__ ("lock; addl $0,0(%%rsp)" : : : "memory", "cc") #endif @@ -67,14 +67,14 @@ typedef struct pg_atomic_uint32 * It's too complicated to write inline asm for 64bit types on 32bit and the * 486 can't do it anyway. */ -#ifdef __x86_64__ +#ifdef PG_ARCH_X86_64 #define PG_HAVE_ATOMIC_U64_SUPPORT typedef struct pg_atomic_uint64 { /* alignment guaranteed due to being on a 64bit platform */ volatile uint64 value; } pg_atomic_uint64; -#endif /* __x86_64__ */ +#endif /* PG_ARCH_X86_64 */ #endif /* defined(__GNUC__) || defined(__INTEL_COMPILER) */ @@ -109,7 +109,7 @@ pg_spin_delay_impl(void) { __asm__ __volatile__(" rep; nop \n"); } -#elif defined(_MSC_VER) && defined(__x86_64__) +#elif defined(_MSC_VER) && defined(PG_ARCH_X86_64) #define PG_HAVE_SPIN_DELAY static __forceinline void pg_spin_delay_impl(void) @@ -192,7 +192,7 @@ pg_atomic_fetch_add_u32_impl(volatile pg_atomic_uint32 *ptr, int32 add_) return res; } -#ifdef __x86_64__ +#ifdef PG_ARCH_X86_64 #define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64 static inline bool @@ -231,7 +231,7 @@ pg_atomic_fetch_add_u64_impl(volatile pg_atomic_uint64 *ptr, int64 add_) return res; } -#endif /* __x86_64__ */ +#endif /* PG_ARCH_X86_64 */ #endif /* defined(__GNUC__) || defined(__INTEL_COMPILER) */ @@ -241,6 +241,6 @@ pg_atomic_fetch_add_u64_impl(volatile pg_atomic_uint64 *ptr, int64 add_) */ #if defined(__i568__) || defined(__i668__) || /* gcc i586+ */ \ (defined(_M_IX86) && _M_IX86 >= 500) || /* msvc i586+ */ \ - defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) /* gcc, sunpro, msvc */ + defined(PG_ARCH_X86_64) #define PG_HAVE_8BYTE_SINGLE_COPY_ATOMICITY #endif /* 8 byte single-copy atomicity */ diff --git a/src/include/port/pg_bitutils.h b/src/include/port/pg_bitutils.h index 4d88478c9c2..39a756b7638 100644 --- a/src/include/port/pg_bitutils.h +++ b/src/include/port/pg_bitutils.h @@ -82,7 +82,7 @@ pg_leftmost_one_pos64(uint64 word) #error must have a working 64-bit integer datatype #endif /* HAVE_LONG_INT_64 */ -#elif defined(_MSC_VER) && (defined(_M_AMD64) || defined(_M_ARM64)) +#elif defined(_MSC_VER) && (defined(PG_ARCH_ARM_64) || defined(PG_ARCH_X86_64)) unsigned long result; bool non_zero; @@ -155,7 +155,7 @@ pg_rightmost_one_pos64(uint64 word) #error must have a working 64-bit integer datatype #endif /* HAVE_LONG_INT_64 */ -#elif defined(_MSC_VER) && (defined(_M_AMD64) || defined(_M_ARM64)) +#elif defined(_MSC_VER) && (defined(PG_ARCH_ARM_64) || defined(PG_ARCH_X86_64)) unsigned long result; bool non_zero; @@ -282,7 +282,7 @@ pg_ceil_log2_64(uint64 num) * __builtin_popcount* intrinsic functions as they always emit popcnt * instructions. */ -#if defined(_MSC_VER) && defined(_M_AMD64) +#if defined(_MSC_VER) && defined(PG_ARCH_X86_64) #define HAVE_X86_64_POPCNTQ #endif diff --git a/src/include/port/simd.h b/src/include/port/simd.h index 597496f2fb7..8c3555707e1 100644 --- a/src/include/port/simd.h +++ b/src/include/port/simd.h @@ -18,7 +18,7 @@ #ifndef SIMD_H #define SIMD_H -#if (defined(__x86_64__) || defined(_M_AMD64)) +#ifdef PG_ARCH_X86_64 /* * SSE2 instructions are part of the spec for the 64-bit x86 ISA. We assume * that compilers targeting this architecture understand SSE2 intrinsics. diff --git a/src/include/storage/s_lock.h b/src/include/storage/s_lock.h index e94ed5f48bd..3b9749e6a4b 100644 --- a/src/include/storage/s_lock.h +++ b/src/include/storage/s_lock.h @@ -385,7 +385,7 @@ do \ /* PowerPC */ -#if defined(__ppc__) || defined(__powerpc__) || defined(__ppc64__) || defined(__powerpc64__) +#if defined(PG_ARCH_POWER_32) || defined(PG_ARCH_POWER_64) #define HAS_TEST_AND_SET typedef unsigned int slock_t; diff --git a/src/port/pg_crc32c_sse42.c b/src/port/pg_crc32c_sse42.c index 7f88c114800..30c008ce4f4 100644 --- a/src/port/pg_crc32c_sse42.c +++ b/src/port/pg_crc32c_sse42.c @@ -32,7 +32,7 @@ pg_comp_crc32c_sse42(pg_crc32c crc, const void *data, size_t len) * and performance testing didn't show any performance gain from aligning * the begin address. */ -#ifdef __x86_64__ +#ifdef PG_ARCH_X86_64 while (p + 8 <= pend) { crc = (uint32) _mm_crc32_u64(crc, *((const uint64 *) p)); @@ -56,7 +56,7 @@ pg_comp_crc32c_sse42(pg_crc32c crc, const void *data, size_t len) crc = _mm_crc32_u32(crc, *((const unsigned int *) p)); p += 4; } -#endif /* __x86_64__ */ +#endif /* PG_ARCH_X86_64 */ /* Process any remaining bytes one at a time. */ while (p < pend) -- 2.45.2