On Tue, Jul 30, 2024 at 12:39 PM Thomas Munro <thomas.mu...@gmail.com> wrote:
> On Tue, Jul 30, 2024 at 11:16 AM Heikki Linnakangas <hlinn...@iki.fi> wrote:
> > I think we should do:
> >
> > #ifdef _M_AMD64
> > #define __x86_64__
> > #endif
> >
> > somewhere, perhaps in src/include/port/win32.h.

I suppose we could define our own
PG_ARCH_{ARM,MIPS,POWER,RISCV,S390,SPARC,X86}_{32,64} in one central
place, instead.  Draft patch for illustration.
From 4df24c6fe7370cdeacd4e794f4ccc6202a909e62 Mon Sep 17 00:00:00 2001
From: Thomas Munro <thomas.mu...@gmail.com>
Date: Thu, 1 Aug 2024 16:38:05 +1200
Subject: [PATCH] Standardize macros for detecting architectures.

Instead of repeating multiple compilers' architecture macros throughout
the tree, detect them in one central place, and define our own macros of
the form:

  PG_ARCH_{ARM,MIPS,POWER,RISCV,S390,SPARC,X86}_{32,64}

This fixes the problem that MSVC builds were unintentionally using
suboptimal fallback code defined by "port/atomics.h", due to
inconsistent testing for macros.  A couple of other places were also
affected.

XXX This patch doesn't adjust s_lock.h, because it's complicated, full
of old dead sub-architectures, and a nearby patch proposes to delete
it...

Discussion: https://postgr.es/m/CA%2BhUKGKAf_i6w7hB_3pqZXQeqn%2BixvY%2BCMps_n%3DmJ5HAatMjMw%40mail.gmail.com
---
 contrib/pgcrypto/crypt-blowfish.c   |  4 ++--
 src/include/c.h                     | 33 +++++++++++++++++++++++++++++
 src/include/port/atomics.h          |  6 +++---
 src/include/port/atomics/arch-x86.h | 16 +++++++-------
 src/include/port/pg_bitutils.h      |  6 +++---
 src/include/port/simd.h             |  2 +-
 src/include/storage/s_lock.h        |  2 +-
 src/port/pg_crc32c_sse42.c          |  4 ++--
 8 files changed, 53 insertions(+), 20 deletions(-)

diff --git a/contrib/pgcrypto/crypt-blowfish.c b/contrib/pgcrypto/crypt-blowfish.c
index 5a1b1e10091..9c4e02e428b 100644
--- a/contrib/pgcrypto/crypt-blowfish.c
+++ b/contrib/pgcrypto/crypt-blowfish.c
@@ -38,10 +38,10 @@
 #include "px-crypt.h"
 #include "px.h"
 
-#ifdef __i386__
+#if defined(PG_ARCH_X86_32)
 #define BF_ASM				0	/* 1 */
 #define BF_SCALE			1
-#elif defined(__x86_64__)
+#elif defined(PG_ARCH_X86_64)
 #define BF_ASM				0
 #define BF_SCALE			1
 #else
diff --git a/src/include/c.h b/src/include/c.h
index dc1841346cd..542cbd33fad 100644
--- a/src/include/c.h
+++ b/src/include/c.h
@@ -425,6 +425,39 @@ typedef void (*pg_funcptr_t) (void);
 #define HAVE_PRAGMA_GCC_SYSTEM_HEADER	1
 #endif
 
+/*
+ * Project-standardized name for CPU architectures, to avoid having to repeat
+ * the names that different compilers use.
+ */
+#if defined(__arm__) || defined(__arm)
+#define PG_ARCH_ARM_32
+#elif defined(__aarch64__) || defined(_M_ARM64)
+#define PG_ARCH_ARM_64
+#elif defined(__mips__)
+#define PG_ARCH_MIPS_32
+#elif defined(__mips64__)
+#define PG_ARCH_MIPS_64
+#elif defined(__ppc__) || defined(__powerpc__)
+#define PG_ARCH_POWER_32
+#elif defined(__ppc64__) || defined(__powerpc64__)
+#define PG_ARCH_POWER_64
+#elif defined(__riscv__)
+#define PG_ARCH_RISCV_32
+#elif defined(__riscv64__)
+#define PG_ARCH_RISCV_64
+#elif defined(__s390__)
+#define PG_ARCH_S390_32
+#elif defined(__s390x__)
+#define PG_ARCH_S390_64
+#elif defined(__sparc)
+#define PG_ARCH_SPARC_32
+#elif defined(__sparcv9)
+#define PG_ARCH_SPARC_64
+#elif defined(__i386__) || defined (__i386) || defined(_M_IX86)
+#define PG_ARCH_X86_32
+#elif defined(__x86_64__) || defined(__x86_64) || defined (__amd64)
+#define PG_ARCH_X86_64
+#endif
 
 /* ----------------------------------------------------------------
  *				Section 2:	bool, true, false
diff --git a/src/include/port/atomics.h b/src/include/port/atomics.h
index c0c8688f736..3300ea54c17 100644
--- a/src/include/port/atomics.h
+++ b/src/include/port/atomics.h
@@ -63,11 +63,11 @@
  * compiler barrier.
  *
  */
-#if defined(__arm__) || defined(__arm) || defined(__aarch64__)
+#if defined(PG_ARCH_ARM_32) || defined(PG_ARCH_ARM_64)
 #include "port/atomics/arch-arm.h"
-#elif defined(__i386__) || defined(__i386) || defined(__x86_64__)
+#elif defined(PG_ARCH_X86_32) || defined(PG_ARCH_X86_64)
 #include "port/atomics/arch-x86.h"
-#elif defined(__ppc__) || defined(__powerpc__) || defined(__ppc64__) || defined(__powerpc64__)
+#elif defined(PG_ARCH_POWER_32) || defined (PG_ARCH_POWER_64)
 #include "port/atomics/arch-ppc.h"
 #endif
 
diff --git a/src/include/port/atomics/arch-x86.h b/src/include/port/atomics/arch-x86.h
index c12f8a60697..9f20edf221f 100644
--- a/src/include/port/atomics/arch-x86.h
+++ b/src/include/port/atomics/arch-x86.h
@@ -32,10 +32,10 @@
  */
 
 #if defined(__GNUC__) || defined(__INTEL_COMPILER)
-#if defined(__i386__) || defined(__i386)
+#if defined(PG_ARCH_X86_32)
 #define pg_memory_barrier_impl()		\
 	__asm__ __volatile__ ("lock; addl $0,0(%%esp)" : : : "memory", "cc")
-#elif defined(__x86_64__)
+#elif defined(PG_ARCH_X86_64)
 #define pg_memory_barrier_impl()		\
 	__asm__ __volatile__ ("lock; addl $0,0(%%rsp)" : : : "memory", "cc")
 #endif
@@ -67,14 +67,14 @@ typedef struct pg_atomic_uint32
  * It's too complicated to write inline asm for 64bit types on 32bit and the
  * 486 can't do it anyway.
  */
-#ifdef __x86_64__
+#ifdef PG_ARCH_X86_64
 #define PG_HAVE_ATOMIC_U64_SUPPORT
 typedef struct pg_atomic_uint64
 {
 	/* alignment guaranteed due to being on a 64bit platform */
 	volatile uint64 value;
 } pg_atomic_uint64;
-#endif	/* __x86_64__ */
+#endif	/* PG_ARCH_X86_64 */
 
 #endif /* defined(__GNUC__) || defined(__INTEL_COMPILER) */
 
@@ -109,7 +109,7 @@ pg_spin_delay_impl(void)
 {
 	__asm__ __volatile__(" rep; nop			\n");
 }
-#elif defined(_MSC_VER) && defined(__x86_64__)
+#elif defined(_MSC_VER) && defined(PG_ARCH_X86_64)
 #define PG_HAVE_SPIN_DELAY
 static __forceinline void
 pg_spin_delay_impl(void)
@@ -192,7 +192,7 @@ pg_atomic_fetch_add_u32_impl(volatile pg_atomic_uint32 *ptr, int32 add_)
 	return res;
 }
 
-#ifdef __x86_64__
+#ifdef PG_ARCH_X86_64
 
 #define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64
 static inline bool
@@ -231,7 +231,7 @@ pg_atomic_fetch_add_u64_impl(volatile pg_atomic_uint64 *ptr, int64 add_)
 	return res;
 }
 
-#endif /* __x86_64__ */
+#endif /* PG_ARCH_X86_64 */
 
 #endif /* defined(__GNUC__) || defined(__INTEL_COMPILER) */
 
@@ -241,6 +241,6 @@ pg_atomic_fetch_add_u64_impl(volatile pg_atomic_uint64 *ptr, int64 add_)
  */
 #if defined(__i568__) || defined(__i668__) || /* gcc i586+ */  \
 	(defined(_M_IX86) && _M_IX86 >= 500) || /* msvc i586+ */ \
-	defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) /* gcc, sunpro, msvc */
+	defined(PG_ARCH_X86_64)
 #define PG_HAVE_8BYTE_SINGLE_COPY_ATOMICITY
 #endif /* 8 byte single-copy atomicity */
diff --git a/src/include/port/pg_bitutils.h b/src/include/port/pg_bitutils.h
index 4d88478c9c2..39a756b7638 100644
--- a/src/include/port/pg_bitutils.h
+++ b/src/include/port/pg_bitutils.h
@@ -82,7 +82,7 @@ pg_leftmost_one_pos64(uint64 word)
 #error must have a working 64-bit integer datatype
 #endif							/* HAVE_LONG_INT_64 */
 
-#elif defined(_MSC_VER) && (defined(_M_AMD64) || defined(_M_ARM64))
+#elif defined(_MSC_VER) && (defined(PG_ARCH_ARM_64) || defined(PG_ARCH_X86_64))
 	unsigned long result;
 	bool		non_zero;
 
@@ -155,7 +155,7 @@ pg_rightmost_one_pos64(uint64 word)
 #error must have a working 64-bit integer datatype
 #endif							/* HAVE_LONG_INT_64 */
 
-#elif defined(_MSC_VER) && (defined(_M_AMD64) || defined(_M_ARM64))
+#elif defined(_MSC_VER) && (defined(PG_ARCH_ARM_64) || defined(PG_ARCH_X86_64))
 	unsigned long result;
 	bool		non_zero;
 
@@ -282,7 +282,7 @@ pg_ceil_log2_64(uint64 num)
  * __builtin_popcount* intrinsic functions as they always emit popcnt
  * instructions.
  */
-#if defined(_MSC_VER) && defined(_M_AMD64)
+#if defined(_MSC_VER) && defined(PG_ARCH_X86_64)
 #define HAVE_X86_64_POPCNTQ
 #endif
 
diff --git a/src/include/port/simd.h b/src/include/port/simd.h
index 597496f2fb7..8c3555707e1 100644
--- a/src/include/port/simd.h
+++ b/src/include/port/simd.h
@@ -18,7 +18,7 @@
 #ifndef SIMD_H
 #define SIMD_H
 
-#if (defined(__x86_64__) || defined(_M_AMD64))
+#ifdef PG_ARCH_X86_64
 /*
  * SSE2 instructions are part of the spec for the 64-bit x86 ISA. We assume
  * that compilers targeting this architecture understand SSE2 intrinsics.
diff --git a/src/include/storage/s_lock.h b/src/include/storage/s_lock.h
index e94ed5f48bd..3b9749e6a4b 100644
--- a/src/include/storage/s_lock.h
+++ b/src/include/storage/s_lock.h
@@ -385,7 +385,7 @@ do \
 
 
 /* PowerPC */
-#if defined(__ppc__) || defined(__powerpc__) || defined(__ppc64__) || defined(__powerpc64__)
+#if defined(PG_ARCH_POWER_32) || defined(PG_ARCH_POWER_64)
 #define HAS_TEST_AND_SET
 
 typedef unsigned int slock_t;
diff --git a/src/port/pg_crc32c_sse42.c b/src/port/pg_crc32c_sse42.c
index 7f88c114800..30c008ce4f4 100644
--- a/src/port/pg_crc32c_sse42.c
+++ b/src/port/pg_crc32c_sse42.c
@@ -32,7 +32,7 @@ pg_comp_crc32c_sse42(pg_crc32c crc, const void *data, size_t len)
 	 * and performance testing didn't show any performance gain from aligning
 	 * the begin address.
 	 */
-#ifdef __x86_64__
+#ifdef PG_ARCH_X86_64
 	while (p + 8 <= pend)
 	{
 		crc = (uint32) _mm_crc32_u64(crc, *((const uint64 *) p));
@@ -56,7 +56,7 @@ pg_comp_crc32c_sse42(pg_crc32c crc, const void *data, size_t len)
 		crc = _mm_crc32_u32(crc, *((const unsigned int *) p));
 		p += 4;
 	}
-#endif							/* __x86_64__ */
+#endif							/* PG_ARCH_X86_64 */
 
 	/* Process any remaining bytes one at a time. */
 	while (p < pend)
-- 
2.45.2

Reply via email to