On Thu, Aug 1, 2024 at 10:38 AM Andres Freund <and...@anarazel.de> wrote:
> On 2024-08-01 10:09:07 +1200, Thomas Munro wrote:
> > On Thu, Aug 1, 2024 at 7:07 AM Andres Freund <and...@anarazel.de> wrote:
> > > Note that I would like to add a user for S_LOCK_FREE(), to detect repeated
> > > SpinLockRelease():
> > > https://postgr.es/m/20240729182952.hua325647e2ggbsy%40awork3.anarazel.de
> >
> > What about adding a "magic" member in assertion builds?  Here is my
> > attempt at that, in 0002.
>
> That changes the ABI, which we don't want, because it breaks using
> extensions against a differently built postgres.

Yeah, right, bad idea.  Let me think about how to do something like
what you showed, but with the atomics patch...

Hmm.  One of the interesting things about the atomic_flag interface is
that it completely hides the contents of memory.  (Guess: its weird
minimal interface was designed to help weird architectures like
PA-RISC, staying on topic for $SUBJECT; I doubt we'll see such a
system again but it's useful for this trick).  So I guess we could
push the check down to that layer, and choose arbitrary non-zero
values for the arch-x86.h implementation of pg_atomic_flag .  See
attached.  Is this on the right track?

(Looking ahead, if we eventually move to using <stdatomic.h>, we won't
be able to use atomic_flag due to lack of relaxed load anyway, so we
could generalise this to atomic_char (rather than atomic_bool), and
keep using non-zero values.  Presumably at that point we could also
decree that zero-initialised memory is valid for initialising our
spinlocks, but it seems useful as a defence against uninitialised
objects anyway.)

> I don't really see a reason to avoid having S_LOCK_FREE(), am I missing
> something? Previously the semaphore fallback was a reason, but that's gone
> now...

Sure, but if it's just for assertions, we don't need it.  Or any of
the S_XXX stuff.
From 8981ac9dcf0a76d4f75a1d2c71822579e1b18a93 Mon Sep 17 00:00:00 2001
From: Thomas Munro <thomas.mu...@gmail.com>
Date: Wed, 31 Jul 2024 13:11:35 +1200
Subject: [PATCH v3 1/3] Use atomics API to implement spinlocks.

Since our spinlock API pre-dates our C11-style atomics API by decades,
it had its own hand-crafted operations written in assembler.  Use the
atomics API instead, to simplify and de-duplicate.  We couldn't have
done this earlier, because that'd be circular: atomics were simulated
with spinlocks in --disable-atomics builds.  Commit 81385261 removed
that option, so now we can delete most the system-specific spinlock code
and just redirect everything to pg_atomic_flag.

The main special knowledge embodied in the hand-crafted code was the
relaxed load of the lock value before attempting to test-and-set, while
spinning.  That is retained in simplified form in the new coding.
---
 configure                              |  22 -
 configure.ac                           |  19 -
 src/Makefile.global.in                 |   3 -
 src/backend/port/Makefile              |  12 -
 src/backend/port/meson.build           |   2 +-
 src/backend/port/tas/dummy.s           |   0
 src/backend/port/tas/sunstudio_sparc.s |  53 --
 src/backend/port/tas/sunstudio_x86.s   |  43 --
 src/backend/storage/lmgr/s_lock.c      | 128 +----
 src/include/storage/s_lock.h           | 749 -------------------------
 src/include/storage/spin.h             |  70 ++-
 src/template/linux                     |  15 -
 src/template/solaris                   |  15 -
 src/test/regress/regress.c             |  25 +-
 14 files changed, 85 insertions(+), 1071 deletions(-)
 delete mode 100644 src/backend/port/tas/dummy.s
 delete mode 100644 src/backend/port/tas/sunstudio_sparc.s
 delete mode 100644 src/backend/port/tas/sunstudio_x86.s
 delete mode 100644 src/include/storage/s_lock.h

diff --git a/configure b/configure
index 8f684f7945e..e2267837b7d 100755
--- a/configure
+++ b/configure
@@ -731,7 +731,6 @@ PKG_CONFIG_LIBDIR
 PKG_CONFIG_PATH
 PKG_CONFIG
 DLSUFFIX
-TAS
 GCC
 CPP
 CFLAGS_SL
@@ -3021,12 +3020,6 @@ $as_echo "$template" >&6; }
 PORTNAME=$template
 
 
-# Initialize default assumption that we do not need separate assembly code
-# for TAS (test-and-set).  This can be overridden by the template file
-# when it's executed.
-need_tas=no
-tas_file=dummy.s
-
 # Default, works for most platforms, override in template file if needed
 DLSUFFIX=".so"
 
@@ -7770,20 +7763,6 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu
 
 
 
-#
-# Set up TAS assembly code if needed; the template file has now had its
-# chance to request this.
-#
-ac_config_links="$ac_config_links src/backend/port/tas.s:src/backend/port/tas/${tas_file}"
-
-
-if test "$need_tas" = yes ; then
-  TAS=tas.o
-else
-  TAS=""
-fi
-
-
 
 cat >>confdefs.h <<_ACEOF
 #define DLSUFFIX "$DLSUFFIX"
@@ -19924,7 +19903,6 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
 for ac_config_target in $ac_config_targets
 do
   case $ac_config_target in
-    "src/backend/port/tas.s") CONFIG_LINKS="$CONFIG_LINKS src/backend/port/tas.s:src/backend/port/tas/${tas_file}" ;;
     "GNUmakefile") CONFIG_FILES="$CONFIG_FILES GNUmakefile" ;;
     "src/Makefile.global") CONFIG_FILES="$CONFIG_FILES src/Makefile.global" ;;
     "src/backend/port/pg_sema.c") CONFIG_LINKS="$CONFIG_LINKS src/backend/port/pg_sema.c:${SEMA_IMPLEMENTATION}" ;;
diff --git a/configure.ac b/configure.ac
index 75b73532fe0..59c3b7e3d35 100644
--- a/configure.ac
+++ b/configure.ac
@@ -95,12 +95,6 @@ AC_MSG_RESULT([$template])
 PORTNAME=$template
 AC_SUBST(PORTNAME)
 
-# Initialize default assumption that we do not need separate assembly code
-# for TAS (test-and-set).  This can be overridden by the template file
-# when it's executed.
-need_tas=no
-tas_file=dummy.s
-
 # Default, works for most platforms, override in template file if needed
 DLSUFFIX=".so"
 
@@ -740,19 +734,6 @@ AC_PROG_CPP
 AC_SUBST(GCC)
 
 
-#
-# Set up TAS assembly code if needed; the template file has now had its
-# chance to request this.
-#
-AC_CONFIG_LINKS([src/backend/port/tas.s:src/backend/port/tas/${tas_file}])
-
-if test "$need_tas" = yes ; then
-  TAS=tas.o
-else
-  TAS=""
-fi
-AC_SUBST(TAS)
-
 AC_SUBST(DLSUFFIX)dnl
 AC_DEFINE_UNQUOTED([DLSUFFIX], ["$DLSUFFIX"],
                    [Define to the file name extension of dynamically-loadable modules.])
diff --git a/src/Makefile.global.in b/src/Makefile.global.in
index 83b91fe9167..0301f463027 100644
--- a/src/Makefile.global.in
+++ b/src/Makefile.global.in
@@ -771,9 +771,6 @@ ifeq ($(PORTNAME),win32)
 LIBS += -lws2_32
 endif
 
-# Not really standard libc functions, used by the backend.
-TAS         = @TAS@
-
 
 ##########################################################################
 #
diff --git a/src/backend/port/Makefile b/src/backend/port/Makefile
index 47338d99229..8613ac01aff 100644
--- a/src/backend/port/Makefile
+++ b/src/backend/port/Makefile
@@ -22,7 +22,6 @@ top_builddir = ../../..
 include $(top_builddir)/src/Makefile.global
 
 OBJS = \
-	$(TAS) \
 	atomics.o \
 	pg_sema.o \
 	pg_shmem.o
@@ -33,16 +32,5 @@ endif
 
 include $(top_srcdir)/src/backend/common.mk
 
-tas.o: tas.s
-ifeq ($(SUN_STUDIO_CC), yes)
-# preprocess assembler file with cpp
-	$(CC) $(CFLAGS) -c -P $<
-	mv $*.i $*_cpp.s
-	$(CC) $(CFLAGS) -c $*_cpp.s -o $@
-else
-	$(CC) $(CFLAGS) -c $<
-endif
-
 clean:
-	rm -f tas_cpp.s
 	$(MAKE) -C win32 clean
diff --git a/src/backend/port/meson.build b/src/backend/port/meson.build
index 7820e86016d..3270ffb7030 100644
--- a/src/backend/port/meson.build
+++ b/src/backend/port/meson.build
@@ -30,4 +30,4 @@ if host_system == 'windows'
 endif
 
 # autoconf generates the file there, ensure we get a conflict
-generated_sources_ac += {'src/backend/port': ['pg_sema.c', 'pg_shmem.c', 'tas.s']}
+generated_sources_ac += {'src/backend/port': ['pg_sema.c', 'pg_shmem.c']}
diff --git a/src/backend/port/tas/dummy.s b/src/backend/port/tas/dummy.s
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/src/backend/port/tas/sunstudio_sparc.s b/src/backend/port/tas/sunstudio_sparc.s
deleted file mode 100644
index 3400713afd5..00000000000
--- a/src/backend/port/tas/sunstudio_sparc.s
+++ /dev/null
@@ -1,53 +0,0 @@
-!-------------------------------------------------------------------------
-!
-! sunstudio_sparc.s
-!	  compare and swap for Sun Studio on Sparc
-!
-! Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
-! Portions Copyright (c) 1994, Regents of the University of California
-!
-! IDENTIFICATION
-!	  src/backend/port/tas/sunstudio_sparc.s
-!
-!-------------------------------------------------------------------------
-
-! Fortunately the Sun compiler can process cpp conditionals with -P
-
-! '/' is the comment for x86, while '!' is the comment for Sparc
-
-#if defined(__sparcv9) || defined(__sparc)
-
-	.section        ".text"
-	.align  8
-	.skip   24
-	.align  4
-
-	.global pg_atomic_cas
-pg_atomic_cas:
-
-	! "cas" only works on sparcv9 and sparcv8plus chips, and
-	! requires a compiler targeting these CPUs.  It will fail
-	! on a compiler targeting sparcv8, and of course will not
-	! be understood by a sparcv8 CPU.  gcc continues to use
-	! "ldstub" because it targets sparcv7.
-	!
-	! There is actually a trick for embedding "cas" in a
-	! sparcv8-targeted compiler, but it can only be run
-	! on a sparcv8plus/v9 cpus:
-	!
-	!   http://cvs.opensolaris.org/source/xref/on/usr/src/lib/libc/sparc/threads/sparc.il
-	!
-	! NB: We're assuming we're running on a TSO system here - solaris
-	! userland luckily always has done so.
-
-#if defined(__sparcv9) || defined(__sparcv8plus)
-	cas     [%o0],%o2,%o1
-#else
-	ldstub [%o0],%o1
-#endif
-	mov     %o1,%o0
-	retl
-	nop
-	.type   pg_atomic_cas,2
-	.size   pg_atomic_cas,(.-pg_atomic_cas)
-#endif
diff --git a/src/backend/port/tas/sunstudio_x86.s b/src/backend/port/tas/sunstudio_x86.s
deleted file mode 100644
index b4608a9ceb2..00000000000
--- a/src/backend/port/tas/sunstudio_x86.s
+++ /dev/null
@@ -1,43 +0,0 @@
-/-------------------------------------------------------------------------
-/
-/ sunstudio_x86.s
-/	  compare and swap for Sun Studio on x86
-/
-/ Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
-/ Portions Copyright (c) 1994, Regents of the University of California
-/
-/ IDENTIFICATION
-/	  src/backend/port/tas/sunstudio_x86.s
-/
-/-------------------------------------------------------------------------
-
-/ Fortunately the Sun compiler can process cpp conditionals with -P
-
-/ '/' is the comment for x86, while '!' is the comment for Sparc
-
-	.file   "tas.s"
-
-#if defined(__amd64)
-	.code64
-#endif
-
-	.globl pg_atomic_cas
-	.type pg_atomic_cas, @function
-
-	.section .text, "ax"
-	.align 16
-
-pg_atomic_cas:
-#if defined(__amd64)
-	movl       %edx,%eax
-	lock
-	cmpxchgl   %esi,(%rdi)
-#else
-	movl    4(%esp), %edx
-	movl    8(%esp), %ecx
-	movl    12(%esp), %eax
-	lock
-	cmpxchgl %ecx, (%edx)
-#endif
-	ret
-	.size pg_atomic_cas, . - pg_atomic_cas
diff --git a/src/backend/storage/lmgr/s_lock.c b/src/backend/storage/lmgr/s_lock.c
index 69549a65dba..18a98b6e638 100644
--- a/src/backend/storage/lmgr/s_lock.c
+++ b/src/backend/storage/lmgr/s_lock.c
@@ -52,7 +52,7 @@
 
 #include "common/pg_prng.h"
 #include "port/atomics.h"
-#include "storage/s_lock.h"
+#include "storage/spin.h"
 #include "utils/wait_event.h"
 
 #define MIN_SPINS_PER_DELAY 10
@@ -93,7 +93,7 @@ s_lock_stuck(const char *file, int line, const char *func)
 }
 
 /*
- * s_lock(lock) - platform-independent portion of waiting for a spinlock.
+ * s_lock(lock) - out-of-line portion of waiting for a spinlock.
  */
 int
 s_lock(volatile slock_t *lock, const char *file, int line, const char *func)
@@ -102,8 +102,27 @@ s_lock(volatile slock_t *lock, const char *file, int line, const char *func)
 
 	init_spin_delay(&delayStatus, file, line, func);
 
-	while (TAS_SPIN(lock))
+	for (;;)
 	{
+		bool		probably_free = true;
+
+#if defined(__i386__) || defined(__x86_64__) || \
+	defined(_M_IX86) || defined(_M_AMD64) || \
+	defined(__ppc__) || defined(__powerpc__) || \
+	defined(__ppc64__) || defined(__powerpc64__) \
+
+
+		/*
+		 * On these architectures, it is known to be more efficient to test
+		 * the lock with a relaxed load first, while spinning.
+		 */
+		probably_free = pg_atomic_unlocked_test_flag(lock);
+#endif
+
+		/* Try to get the lock. */
+		if (probably_free && pg_atomic_test_set_flag(lock))
+			break;
+
 		perform_spin_delay(&delayStatus);
 	}
 
@@ -112,14 +131,6 @@ s_lock(volatile slock_t *lock, const char *file, int line, const char *func)
 	return delayStatus.delays;
 }
 
-#ifdef USE_DEFAULT_S_UNLOCK
-void
-s_unlock(volatile slock_t *lock)
-{
-	*lock = 0;
-}
-#endif
-
 /*
  * Wait while spinning on a contended spinlock.
  */
@@ -127,7 +138,7 @@ void
 perform_spin_delay(SpinDelayStatus *status)
 {
 	/* CPU-specific delay each time through the loop */
-	SPIN_DELAY();
+	pg_spin_delay();
 
 	/* Block the process every spins_per_delay tries */
 	if (++(status->spins) >= spins_per_delay)
@@ -230,96 +241,3 @@ update_spins_per_delay(int shared_spins_per_delay)
 	 */
 	return (shared_spins_per_delay * 15 + spins_per_delay) / 16;
 }
-
-
-/*****************************************************************************/
-#if defined(S_LOCK_TEST)
-
-/*
- * test program for verifying a port's spinlock support.
- */
-
-struct test_lock_struct
-{
-	char		pad1;
-	slock_t		lock;
-	char		pad2;
-};
-
-volatile struct test_lock_struct test_lock;
-
-int
-main()
-{
-	pg_prng_seed(&pg_global_prng_state, (uint64) time(NULL));
-
-	test_lock.pad1 = test_lock.pad2 = 0x44;
-
-	S_INIT_LOCK(&test_lock.lock);
-
-	if (test_lock.pad1 != 0x44 || test_lock.pad2 != 0x44)
-	{
-		printf("S_LOCK_TEST: failed, declared datatype is wrong size\n");
-		return 1;
-	}
-
-	if (!S_LOCK_FREE(&test_lock.lock))
-	{
-		printf("S_LOCK_TEST: failed, lock not initialized\n");
-		return 1;
-	}
-
-	S_LOCK(&test_lock.lock);
-
-	if (test_lock.pad1 != 0x44 || test_lock.pad2 != 0x44)
-	{
-		printf("S_LOCK_TEST: failed, declared datatype is wrong size\n");
-		return 1;
-	}
-
-	if (S_LOCK_FREE(&test_lock.lock))
-	{
-		printf("S_LOCK_TEST: failed, lock not locked\n");
-		return 1;
-	}
-
-	S_UNLOCK(&test_lock.lock);
-
-	if (test_lock.pad1 != 0x44 || test_lock.pad2 != 0x44)
-	{
-		printf("S_LOCK_TEST: failed, declared datatype is wrong size\n");
-		return 1;
-	}
-
-	if (!S_LOCK_FREE(&test_lock.lock))
-	{
-		printf("S_LOCK_TEST: failed, lock not unlocked\n");
-		return 1;
-	}
-
-	S_LOCK(&test_lock.lock);
-
-	if (test_lock.pad1 != 0x44 || test_lock.pad2 != 0x44)
-	{
-		printf("S_LOCK_TEST: failed, declared datatype is wrong size\n");
-		return 1;
-	}
-
-	if (S_LOCK_FREE(&test_lock.lock))
-	{
-		printf("S_LOCK_TEST: failed, lock not re-locked\n");
-		return 1;
-	}
-
-	printf("S_LOCK_TEST: this will print %d stars and then\n", NUM_DELAYS);
-	printf("             exit with a 'stuck spinlock' message\n");
-	printf("             if S_LOCK() and TAS() are working.\n");
-	fflush(stdout);
-
-	s_lock(&test_lock.lock, __FILE__, __LINE__, __func__);
-
-	printf("S_LOCK_TEST: failed, lock not locked\n");
-	return 1;
-}
-
-#endif							/* S_LOCK_TEST */
diff --git a/src/include/storage/s_lock.h b/src/include/storage/s_lock.h
deleted file mode 100644
index e94ed5f48bd..00000000000
--- a/src/include/storage/s_lock.h
+++ /dev/null
@@ -1,749 +0,0 @@
-/*-------------------------------------------------------------------------
- *
- * s_lock.h
- *	   Implementation of spinlocks.
- *
- *	NOTE: none of the macros in this file are intended to be called directly.
- *	Call them through the macros in spin.h.
- *
- *	The following hardware-dependent macros must be provided for each
- *	supported platform:
- *
- *	void S_INIT_LOCK(slock_t *lock)
- *		Initialize a spinlock (to the unlocked state).
- *
- *	int S_LOCK(slock_t *lock)
- *		Acquire a spinlock, waiting if necessary.
- *		Time out and abort() if unable to acquire the lock in a
- *		"reasonable" amount of time --- typically ~ 1 minute.
- *		Should return number of "delays"; see s_lock.c
- *
- *	void S_UNLOCK(slock_t *lock)
- *		Unlock a previously acquired lock.
- *
- *	bool S_LOCK_FREE(slock_t *lock)
- *		Tests if the lock is free. Returns true if free, false if locked.
- *		This does *not* change the state of the lock.
- *
- *	void SPIN_DELAY(void)
- *		Delay operation to occur inside spinlock wait loop.
- *
- *	Note to implementors: there are default implementations for all these
- *	macros at the bottom of the file.  Check if your platform can use
- *	these or needs to override them.
- *
- *  Usually, S_LOCK() is implemented in terms of even lower-level macros
- *	TAS() and TAS_SPIN():
- *
- *	int TAS(slock_t *lock)
- *		Atomic test-and-set instruction.  Attempt to acquire the lock,
- *		but do *not* wait.	Returns 0 if successful, nonzero if unable
- *		to acquire the lock.
- *
- *	int TAS_SPIN(slock_t *lock)
- *		Like TAS(), but this version is used when waiting for a lock
- *		previously found to be contended.  By default, this is the
- *		same as TAS(), but on some architectures it's better to poll a
- *		contended lock using an unlocked instruction and retry the
- *		atomic test-and-set only when it appears free.
- *
- *	TAS() and TAS_SPIN() are NOT part of the API, and should never be called
- *	directly.
- *
- *	CAUTION: on some platforms TAS() and/or TAS_SPIN() may sometimes report
- *	failure to acquire a lock even when the lock is not locked.  For example,
- *	on Alpha TAS() will "fail" if interrupted.  Therefore a retry loop must
- *	always be used, even if you are certain the lock is free.
- *
- *	It is the responsibility of these macros to make sure that the compiler
- *	does not re-order accesses to shared memory to precede the actual lock
- *	acquisition, or follow the lock release.  Prior to PostgreSQL 9.5, this
- *	was the caller's responsibility, which meant that callers had to use
- *	volatile-qualified pointers to refer to both the spinlock itself and the
- *	shared data being accessed within the spinlocked critical section.  This
- *	was notationally awkward, easy to forget (and thus error-prone), and
- *	prevented some useful compiler optimizations.  For these reasons, we
- *	now require that the macros themselves prevent compiler re-ordering,
- *	so that the caller doesn't need to take special precautions.
- *
- *	On platforms with weak memory ordering, the TAS(), TAS_SPIN(), and
- *	S_UNLOCK() macros must further include hardware-level memory fence
- *	instructions to prevent similar re-ordering at the hardware level.
- *	TAS() and TAS_SPIN() must guarantee that loads and stores issued after
- *	the macro are not executed until the lock has been obtained.  Conversely,
- *	S_UNLOCK() must guarantee that loads and stores issued before the macro
- *	have been executed before the lock is released.
- *
- *	On most supported platforms, TAS() uses a tas() function written
- *	in assembly language to execute a hardware atomic-test-and-set
- *	instruction.  Equivalent OS-supplied mutex routines could be used too.
- *
- *
- * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
- * Portions Copyright (c) 1994, Regents of the University of California
- *
- *	  src/include/storage/s_lock.h
- *
- *-------------------------------------------------------------------------
- */
-#ifndef S_LOCK_H
-#define S_LOCK_H
-
-#ifdef FRONTEND
-#error "s_lock.h may not be included from frontend code"
-#endif
-
-#if defined(__GNUC__) || defined(__INTEL_COMPILER)
-/*************************************************************************
- * All the gcc inlines
- * Gcc consistently defines the CPU as __cpu__.
- * Other compilers use __cpu or __cpu__ so we test for both in those cases.
- */
-
-/*----------
- * Standard gcc asm format (assuming "volatile slock_t *lock"):
-
-	__asm__ __volatile__(
-		"	instruction	\n"
-		"	instruction	\n"
-		"	instruction	\n"
-:		"=r"(_res), "+m"(*lock)		// return register, in/out lock value
-:		"r"(lock)					// lock pointer, in input register
-:		"memory", "cc");			// show clobbered registers here
-
- * The output-operands list (after first colon) should always include
- * "+m"(*lock), whether or not the asm code actually refers to this
- * operand directly.  This ensures that gcc believes the value in the
- * lock variable is used and set by the asm code.  Also, the clobbers
- * list (after third colon) should always include "memory"; this prevents
- * gcc from thinking it can cache the values of shared-memory fields
- * across the asm code.  Add "cc" if your asm code changes the condition
- * code register, and also list any temp registers the code uses.
- *----------
- */
-
-
-#ifdef __i386__		/* 32-bit i386 */
-#define HAS_TEST_AND_SET
-
-typedef unsigned char slock_t;
-
-#define TAS(lock) tas(lock)
-
-static __inline__ int
-tas(volatile slock_t *lock)
-{
-	slock_t		_res = 1;
-
-	/*
-	 * Use a non-locking test before asserting the bus lock.  Note that the
-	 * extra test appears to be a small loss on some x86 platforms and a small
-	 * win on others; it's by no means clear that we should keep it.
-	 *
-	 * When this was last tested, we didn't have separate TAS() and TAS_SPIN()
-	 * macros.  Nowadays it probably would be better to do a non-locking test
-	 * in TAS_SPIN() but not in TAS(), like on x86_64, but no-one's done the
-	 * testing to verify that.  Without some empirical evidence, better to
-	 * leave it alone.
-	 */
-	__asm__ __volatile__(
-		"	cmpb	$0,%1	\n"
-		"	jne		1f		\n"
-		"	lock			\n"
-		"	xchgb	%0,%1	\n"
-		"1: \n"
-:		"+q"(_res), "+m"(*lock)
-:		/* no inputs */
-:		"memory", "cc");
-	return (int) _res;
-}
-
-#define SPIN_DELAY() spin_delay()
-
-static __inline__ void
-spin_delay(void)
-{
-	/*
-	 * This sequence is equivalent to the PAUSE instruction ("rep" is
-	 * ignored by old IA32 processors if the following instruction is
-	 * not a string operation); the IA-32 Architecture Software
-	 * Developer's Manual, Vol. 3, Section 7.7.2 describes why using
-	 * PAUSE in the inner loop of a spin lock is necessary for good
-	 * performance:
-	 *
-	 *     The PAUSE instruction improves the performance of IA-32
-	 *     processors supporting Hyper-Threading Technology when
-	 *     executing spin-wait loops and other routines where one
-	 *     thread is accessing a shared lock or semaphore in a tight
-	 *     polling loop. When executing a spin-wait loop, the
-	 *     processor can suffer a severe performance penalty when
-	 *     exiting the loop because it detects a possible memory order
-	 *     violation and flushes the core processor's pipeline. The
-	 *     PAUSE instruction provides a hint to the processor that the
-	 *     code sequence is a spin-wait loop. The processor uses this
-	 *     hint to avoid the memory order violation and prevent the
-	 *     pipeline flush. In addition, the PAUSE instruction
-	 *     de-pipelines the spin-wait loop to prevent it from
-	 *     consuming execution resources excessively.
-	 */
-	__asm__ __volatile__(
-		" rep; nop			\n");
-}
-
-#endif	 /* __i386__ */
-
-
-#ifdef __x86_64__		/* AMD Opteron, Intel EM64T */
-#define HAS_TEST_AND_SET
-
-typedef unsigned char slock_t;
-
-#define TAS(lock) tas(lock)
-
-/*
- * On Intel EM64T, it's a win to use a non-locking test before the xchg proper,
- * but only when spinning.
- *
- * See also Implementing Scalable Atomic Locks for Multi-Core Intel(tm) EM64T
- * and IA32, by Michael Chynoweth and Mary R. Lee. As of this writing, it is
- * available at:
- * http://software.intel.com/en-us/articles/implementing-scalable-atomic-locks-for-multi-core-intel-em64t-and-ia32-architectures
- */
-#define TAS_SPIN(lock)    (*(lock) ? 1 : TAS(lock))
-
-static __inline__ int
-tas(volatile slock_t *lock)
-{
-	slock_t		_res = 1;
-
-	__asm__ __volatile__(
-		"	lock			\n"
-		"	xchgb	%0,%1	\n"
-:		"+q"(_res), "+m"(*lock)
-:		/* no inputs */
-:		"memory", "cc");
-	return (int) _res;
-}
-
-#define SPIN_DELAY() spin_delay()
-
-static __inline__ void
-spin_delay(void)
-{
-	/*
-	 * Adding a PAUSE in the spin delay loop is demonstrably a no-op on
-	 * Opteron, but it may be of some use on EM64T, so we keep it.
-	 */
-	__asm__ __volatile__(
-		" rep; nop			\n");
-}
-
-#endif	 /* __x86_64__ */
-
-
-/*
- * On ARM and ARM64, we use __sync_lock_test_and_set(int *, int) if available.
- *
- * We use the int-width variant of the builtin because it works on more chips
- * than other widths.
- */
-#if defined(__arm__) || defined(__arm) || defined(__aarch64__)
-#ifdef HAVE_GCC__SYNC_INT32_TAS
-#define HAS_TEST_AND_SET
-
-#define TAS(lock) tas(lock)
-
-typedef int slock_t;
-
-static __inline__ int
-tas(volatile slock_t *lock)
-{
-	return __sync_lock_test_and_set(lock, 1);
-}
-
-#define S_UNLOCK(lock) __sync_lock_release(lock)
-
-/*
- * Using an ISB instruction to delay in spinlock loops appears beneficial on
- * high-core-count ARM64 processors.  It seems mostly a wash for smaller gear,
- * and ISB doesn't exist at all on pre-v7 ARM chips.
- */
-#if defined(__aarch64__)
-
-#define SPIN_DELAY() spin_delay()
-
-static __inline__ void
-spin_delay(void)
-{
-	__asm__ __volatile__(
-		" isb;				\n");
-}
-
-#endif	 /* __aarch64__ */
-#endif	 /* HAVE_GCC__SYNC_INT32_TAS */
-#endif	 /* __arm__ || __arm || __aarch64__ */
-
-
-/* S/390 and S/390x Linux (32- and 64-bit zSeries) */
-#if defined(__s390__) || defined(__s390x__)
-#define HAS_TEST_AND_SET
-
-typedef unsigned int slock_t;
-
-#define TAS(lock)	   tas(lock)
-
-static __inline__ int
-tas(volatile slock_t *lock)
-{
-	int			_res = 0;
-
-	__asm__	__volatile__(
-		"	cs 	%0,%3,0(%2)		\n"
-:		"+d"(_res), "+m"(*lock)
-:		"a"(lock), "d"(1)
-:		"memory", "cc");
-	return _res;
-}
-
-#endif	 /* __s390__ || __s390x__ */
-
-
-#if defined(__sparc__)		/* Sparc */
-/*
- * Solaris has always run sparc processors in TSO (total store) mode, but
- * linux didn't use to and the *BSDs still don't. So, be careful about
- * acquire/release semantics. The CPU will treat superfluous members as
- * NOPs, so it's just code space.
- */
-#define HAS_TEST_AND_SET
-
-typedef unsigned char slock_t;
-
-#define TAS(lock) tas(lock)
-
-static __inline__ int
-tas(volatile slock_t *lock)
-{
-	slock_t		_res;
-
-	/*
-	 *	See comment in src/backend/port/tas/sunstudio_sparc.s for why this
-	 *	uses "ldstub", and that file uses "cas".  gcc currently generates
-	 *	sparcv7-targeted binaries, so "cas" use isn't possible.
-	 */
-	__asm__ __volatile__(
-		"	ldstub	[%2], %0	\n"
-:		"=r"(_res), "+m"(*lock)
-:		"r"(lock)
-:		"memory");
-#if defined(__sparcv7) || defined(__sparc_v7__)
-	/*
-	 * No stbar or membar available, luckily no actually produced hardware
-	 * requires a barrier.
-	 */
-#elif defined(__sparcv8) || defined(__sparc_v8__)
-	/* stbar is available (and required for both PSO, RMO), membar isn't */
-	__asm__ __volatile__ ("stbar	 \n":::"memory");
-#else
-	/*
-	 * #LoadStore (RMO) | #LoadLoad (RMO) together are the appropriate acquire
-	 * barrier for sparcv8+ upwards.
-	 */
-	__asm__ __volatile__ ("membar #LoadStore | #LoadLoad \n":::"memory");
-#endif
-	return (int) _res;
-}
-
-#if defined(__sparcv7) || defined(__sparc_v7__)
-/*
- * No stbar or membar available, luckily no actually produced hardware
- * requires a barrier.  We fall through to the default gcc definition of
- * S_UNLOCK in this case.
- */
-#elif defined(__sparcv8) || defined(__sparc_v8__)
-/* stbar is available (and required for both PSO, RMO), membar isn't */
-#define S_UNLOCK(lock)	\
-do \
-{ \
-	__asm__ __volatile__ ("stbar	 \n":::"memory"); \
-	*((volatile slock_t *) (lock)) = 0; \
-} while (0)
-#else
-/*
- * #LoadStore (RMO) | #StoreStore (RMO, PSO) together are the appropriate
- * release barrier for sparcv8+ upwards.
- */
-#define S_UNLOCK(lock)	\
-do \
-{ \
-	__asm__ __volatile__ ("membar #LoadStore | #StoreStore \n":::"memory"); \
-	*((volatile slock_t *) (lock)) = 0; \
-} while (0)
-#endif
-
-#endif	 /* __sparc__ */
-
-
-/* PowerPC */
-#if defined(__ppc__) || defined(__powerpc__) || defined(__ppc64__) || defined(__powerpc64__)
-#define HAS_TEST_AND_SET
-
-typedef unsigned int slock_t;
-
-#define TAS(lock) tas(lock)
-
-/* On PPC, it's a win to use a non-locking test before the lwarx */
-#define TAS_SPIN(lock)	(*(lock) ? 1 : TAS(lock))
-
-/*
- * The second operand of addi can hold a constant zero or a register number,
- * hence constraint "=&b" to avoid allocating r0.  "b" stands for "address
- * base register"; most operands having this register-or-zero property are
- * address bases, e.g. the second operand of lwax.
- *
- * NOTE: per the Enhanced PowerPC Architecture manual, v1.0 dated 7-May-2002,
- * an isync is a sufficient synchronization barrier after a lwarx/stwcx loop.
- * But if the spinlock is in ordinary memory, we can use lwsync instead for
- * better performance.
- */
-static __inline__ int
-tas(volatile slock_t *lock)
-{
-	slock_t _t;
-	int _res;
-
-	__asm__ __volatile__(
-"	lwarx   %0,0,%3,1	\n"
-"	cmpwi   %0,0		\n"
-"	bne     1f			\n"
-"	addi    %0,%0,1		\n"
-"	stwcx.  %0,0,%3		\n"
-"	beq     2f			\n"
-"1: \n"
-"	li      %1,1		\n"
-"	b       3f			\n"
-"2: \n"
-"	lwsync				\n"
-"	li      %1,0		\n"
-"3: \n"
-:	"=&b"(_t), "=r"(_res), "+m"(*lock)
-:	"r"(lock)
-:	"memory", "cc");
-	return _res;
-}
-
-/*
- * PowerPC S_UNLOCK is almost standard but requires a "sync" instruction.
- * But we can use lwsync instead for better performance.
- */
-#define S_UNLOCK(lock)	\
-do \
-{ \
-	__asm__ __volatile__ ("	lwsync \n" ::: "memory"); \
-	*((volatile slock_t *) (lock)) = 0; \
-} while (0)
-
-#endif /* powerpc */
-
-
-#if defined(__mips__) && !defined(__sgi)	/* non-SGI MIPS */
-#define HAS_TEST_AND_SET
-
-typedef unsigned int slock_t;
-
-#define TAS(lock) tas(lock)
-
-/*
- * Original MIPS-I processors lacked the LL/SC instructions, but if we are
- * so unfortunate as to be running on one of those, we expect that the kernel
- * will handle the illegal-instruction traps and emulate them for us.  On
- * anything newer (and really, MIPS-I is extinct) LL/SC is the only sane
- * choice because any other synchronization method must involve a kernel
- * call.  Unfortunately, many toolchains still default to MIPS-I as the
- * codegen target; if the symbol __mips shows that that's the case, we
- * have to force the assembler to accept LL/SC.
- *
- * R10000 and up processors require a separate SYNC, which has the same
- * issues as LL/SC.
- */
-#if __mips < 2
-#define MIPS_SET_MIPS2	"       .set mips2          \n"
-#else
-#define MIPS_SET_MIPS2
-#endif
-
-static __inline__ int
-tas(volatile slock_t *lock)
-{
-	volatile slock_t *_l = lock;
-	int			_res;
-	int			_tmp;
-
-	__asm__ __volatile__(
-		"       .set push           \n"
-		MIPS_SET_MIPS2
-		"       .set noreorder      \n"
-		"       .set nomacro        \n"
-		"       ll      %0, %2      \n"
-		"       or      %1, %0, 1   \n"
-		"       sc      %1, %2      \n"
-		"       xori    %1, 1       \n"
-		"       or      %0, %0, %1  \n"
-		"       sync                \n"
-		"       .set pop              "
-:		"=&r" (_res), "=&r" (_tmp), "+R" (*_l)
-:		/* no inputs */
-:		"memory");
-	return _res;
-}
-
-/* MIPS S_UNLOCK is almost standard but requires a "sync" instruction */
-#define S_UNLOCK(lock)	\
-do \
-{ \
-	__asm__ __volatile__( \
-		"       .set push           \n" \
-		MIPS_SET_MIPS2 \
-		"       .set noreorder      \n" \
-		"       .set nomacro        \n" \
-		"       sync                \n" \
-		"       .set pop              " \
-:		/* no outputs */ \
-:		/* no inputs */	\
-:		"memory"); \
-	*((volatile slock_t *) (lock)) = 0; \
-} while (0)
-
-#endif /* __mips__ && !__sgi */
-
-
-
-/*
- * If we have no platform-specific knowledge, but we found that the compiler
- * provides __sync_lock_test_and_set(), use that.  Prefer the int-width
- * version over the char-width version if we have both, on the rather dubious
- * grounds that that's known to be more likely to work in the ARM ecosystem.
- * (But we dealt with ARM above.)
- */
-#if !defined(HAS_TEST_AND_SET)
-
-#if defined(HAVE_GCC__SYNC_INT32_TAS)
-#define HAS_TEST_AND_SET
-
-#define TAS(lock) tas(lock)
-
-typedef int slock_t;
-
-static __inline__ int
-tas(volatile slock_t *lock)
-{
-	return __sync_lock_test_and_set(lock, 1);
-}
-
-#define S_UNLOCK(lock) __sync_lock_release(lock)
-
-#elif defined(HAVE_GCC__SYNC_CHAR_TAS)
-#define HAS_TEST_AND_SET
-
-#define TAS(lock) tas(lock)
-
-typedef char slock_t;
-
-static __inline__ int
-tas(volatile slock_t *lock)
-{
-	return __sync_lock_test_and_set(lock, 1);
-}
-
-#define S_UNLOCK(lock) __sync_lock_release(lock)
-
-#endif	 /* HAVE_GCC__SYNC_INT32_TAS */
-
-#endif	/* !defined(HAS_TEST_AND_SET) */
-
-
-/*
- * Default implementation of S_UNLOCK() for gcc/icc.
- *
- * Note that this implementation is unsafe for any platform that can reorder
- * a memory access (either load or store) after a following store.  That
- * happens not to be possible on x86 and most legacy architectures (some are
- * single-processor!), but many modern systems have weaker memory ordering.
- * Those that do must define their own version of S_UNLOCK() rather than
- * relying on this one.
- */
-#if !defined(S_UNLOCK)
-#define S_UNLOCK(lock)	\
-	do { __asm__ __volatile__("" : : : "memory");  *(lock) = 0; } while (0)
-#endif
-
-#endif	/* defined(__GNUC__) || defined(__INTEL_COMPILER) */
-
-
-/*
- * ---------------------------------------------------------------------
- * Platforms that use non-gcc inline assembly:
- * ---------------------------------------------------------------------
- */
-
-#if !defined(HAS_TEST_AND_SET)	/* We didn't trigger above, let's try here */
-
-/* These are in sunstudio_(sparc|x86).s */
-
-#if defined(__SUNPRO_C) && (defined(__i386) || defined(__x86_64__) || defined(__sparc__) || defined(__sparc))
-#define HAS_TEST_AND_SET
-
-#if defined(__i386) || defined(__x86_64__) || defined(__sparcv9) || defined(__sparcv8plus)
-typedef unsigned int slock_t;
-#else
-typedef unsigned char slock_t;
-#endif
-
-extern slock_t pg_atomic_cas(volatile slock_t *lock, slock_t with,
-									  slock_t cmp);
-
-#define TAS(a) (pg_atomic_cas((a), 1, 0) != 0)
-#endif
-
-
-#ifdef _MSC_VER
-typedef LONG slock_t;
-
-#define HAS_TEST_AND_SET
-#define TAS(lock) (InterlockedCompareExchange(lock, 1, 0))
-
-#define SPIN_DELAY() spin_delay()
-
-/* If using Visual C++ on Win64, inline assembly is unavailable.
- * Use a _mm_pause intrinsic instead of rep nop.
- */
-#if defined(_WIN64)
-static __forceinline void
-spin_delay(void)
-{
-	_mm_pause();
-}
-#else
-static __forceinline void
-spin_delay(void)
-{
-	/* See comment for gcc code. Same code, MASM syntax */
-	__asm rep nop;
-}
-#endif
-
-#include <intrin.h>
-#pragma intrinsic(_ReadWriteBarrier)
-
-#define S_UNLOCK(lock)	\
-	do { _ReadWriteBarrier(); (*(lock)) = 0; } while (0)
-
-#endif
-
-
-#endif	/* !defined(HAS_TEST_AND_SET) */
-
-
-/* Blow up if we didn't have any way to do spinlocks */
-#ifndef HAS_TEST_AND_SET
-#error PostgreSQL does not have spinlock support on this platform.  Please report this to pgsql-b...@lists.postgresql.org.
-#endif
-
-
-/*
- * Default Definitions - override these above as needed.
- */
-
-#if !defined(S_LOCK)
-#define S_LOCK(lock) \
-	(TAS(lock) ? s_lock((lock), __FILE__, __LINE__, __func__) : 0)
-#endif	 /* S_LOCK */
-
-#if !defined(S_LOCK_FREE)
-#define S_LOCK_FREE(lock)	(*(lock) == 0)
-#endif	 /* S_LOCK_FREE */
-
-#if !defined(S_UNLOCK)
-/*
- * Our default implementation of S_UNLOCK is essentially *(lock) = 0.  This
- * is unsafe if the platform can reorder a memory access (either load or
- * store) after a following store; platforms where this is possible must
- * define their own S_UNLOCK.  But CPU reordering is not the only concern:
- * if we simply defined S_UNLOCK() as an inline macro, the compiler might
- * reorder instructions from inside the critical section to occur after the
- * lock release.  Since the compiler probably can't know what the external
- * function s_unlock is doing, putting the same logic there should be adequate.
- * A sufficiently-smart globally optimizing compiler could break that
- * assumption, though, and the cost of a function call for every spinlock
- * release may hurt performance significantly, so we use this implementation
- * only for platforms where we don't know of a suitable intrinsic.  For the
- * most part, those are relatively obscure platform/compiler combinations to
- * which the PostgreSQL project does not have access.
- */
-#define USE_DEFAULT_S_UNLOCK
-extern void s_unlock(volatile slock_t *lock);
-#define S_UNLOCK(lock)		s_unlock(lock)
-#endif	 /* S_UNLOCK */
-
-#if !defined(S_INIT_LOCK)
-#define S_INIT_LOCK(lock)	S_UNLOCK(lock)
-#endif	 /* S_INIT_LOCK */
-
-#if !defined(SPIN_DELAY)
-#define SPIN_DELAY()	((void) 0)
-#endif	 /* SPIN_DELAY */
-
-#if !defined(TAS)
-extern int	tas(volatile slock_t *lock);		/* in port/.../tas.s, or
-												 * s_lock.c */
-
-#define TAS(lock)		tas(lock)
-#endif	 /* TAS */
-
-#if !defined(TAS_SPIN)
-#define TAS_SPIN(lock)	TAS(lock)
-#endif	 /* TAS_SPIN */
-
-
-/*
- * Platform-independent out-of-line support routines
- */
-extern int s_lock(volatile slock_t *lock, const char *file, int line, const char *func);
-
-/* Support for dynamic adjustment of spins_per_delay */
-#define DEFAULT_SPINS_PER_DELAY  100
-
-extern void set_spins_per_delay(int shared_spins_per_delay);
-extern int	update_spins_per_delay(int shared_spins_per_delay);
-
-/*
- * Support for spin delay which is useful in various places where
- * spinlock-like procedures take place.
- */
-typedef struct
-{
-	int			spins;
-	int			delays;
-	int			cur_delay;
-	const char *file;
-	int			line;
-	const char *func;
-} SpinDelayStatus;
-
-static inline void
-init_spin_delay(SpinDelayStatus *status,
-				const char *file, int line, const char *func)
-{
-	status->spins = 0;
-	status->delays = 0;
-	status->cur_delay = 0;
-	status->file = file;
-	status->line = line;
-	status->func = func;
-}
-
-#define init_local_spin_delay(status) init_spin_delay(status, __FILE__, __LINE__, __func__)
-extern void perform_spin_delay(SpinDelayStatus *status);
-extern void finish_spin_delay(SpinDelayStatus *status);
-
-#endif	 /* S_LOCK_H */
diff --git a/src/include/storage/spin.h b/src/include/storage/spin.h
index 3ae2a56d073..326a2711f23 100644
--- a/src/include/storage/spin.h
+++ b/src/include/storage/spin.h
@@ -14,9 +14,11 @@
  *		Acquire a spinlock, waiting if necessary.
  *		Time out and abort() if unable to acquire the lock in a
  *		"reasonable" amount of time --- typically ~ 1 minute.
+ *		Acquire (including read barrier) semantics.
  *
  *	void SpinLockRelease(volatile slock_t *lock)
  *		Unlock a previously acquired lock.
+ *		Release (including write barrier) semantics.
  *
  *	bool SpinLockFree(slock_t *lock)
  *		Tests if the lock is free. Returns true if free, false if locked.
@@ -35,11 +37,6 @@
  *	for a CHECK_FOR_INTERRUPTS() to occur while holding a spinlock, and so
  *	it is not necessary to do HOLD/RESUME_INTERRUPTS() in these macros.
  *
- *	These macros are implemented in terms of hardware-dependent macros
- *	supplied by s_lock.h.  There is not currently any extra functionality
- *	added by this header, but there has been in the past and may someday
- *	be again.
- *
  *
  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
@@ -51,15 +48,68 @@
 #ifndef SPIN_H
 #define SPIN_H
 
-#include "storage/s_lock.h"
+#ifdef FRONTEND
+#error "spin.h may not be included from frontend code"
+#endif
+
+#include "port/atomics.h"
+
+/* Support for dynamic adjustment of spins_per_delay */
+#define DEFAULT_SPINS_PER_DELAY  100
+
+typedef pg_atomic_flag slock_t;
+
+/*
+ * Support for spin delay which is useful in various places where
+ * spinlock-like procedures take place.
+ */
+typedef struct
+{
+	int			spins;
+	int			delays;
+	int			cur_delay;
+	const char *file;
+	int			line;
+	const char *func;
+} SpinDelayStatus;
+
+static inline void
+init_spin_delay(SpinDelayStatus *status,
+				const char *file, int line, const char *func)
+{
+	status->spins = 0;
+	status->delays = 0;
+	status->cur_delay = 0;
+	status->file = file;
+	status->line = line;
+	status->func = func;
+}
 
+#define init_local_spin_delay(status) init_spin_delay(status, __FILE__, __LINE__, __func__)
+extern void perform_spin_delay(SpinDelayStatus *status);
+extern void finish_spin_delay(SpinDelayStatus *status);
+extern void set_spins_per_delay(int shared_spins_per_delay);
+extern int	update_spins_per_delay(int shared_spins_per_delay);
 
-#define SpinLockInit(lock)	S_INIT_LOCK(lock)
+/* Out-of-line part of spinlock acquisition. */
+extern int	s_lock(volatile slock_t *lock,
+				   const char *file, int line,
+				   const char *func);
 
-#define SpinLockAcquire(lock) S_LOCK(lock)
+static inline void
+SpinLockInit(volatile slock_t *lock)
+{
+	pg_atomic_init_flag(lock);
+}
 
-#define SpinLockRelease(lock) S_UNLOCK(lock)
+#define SpinLockAcquire(lock)						\
+	(pg_atomic_test_set_flag(lock) ? 0 :			\
+	 s_lock((lock), __FILE__, __LINE__, __func__))
 
-#define SpinLockFree(lock)	S_LOCK_FREE(lock)
+static inline void
+SpinLockRelease(volatile slock_t *lock)
+{
+	pg_atomic_clear_flag(lock);
+}
 
 #endif							/* SPIN_H */
diff --git a/src/template/linux b/src/template/linux
index ec3302c4a22..2f04c1a6610 100644
--- a/src/template/linux
+++ b/src/template/linux
@@ -21,19 +21,4 @@ if test "$SUN_STUDIO_CC" = "yes" ; then
   if test "$enable_debug" != yes; then
     CFLAGS="$CFLAGS -O"		# any optimization breaks debug
   fi
-
-  # Pick the right test-and-set (TAS) code for the Sun compiler.
-  # We would like to use in-line assembler, but the compiler
-  # requires *.il files to be on every compile line, making
-  # the build system too fragile.
-  case $host_cpu in
-    sparc)
-	need_tas=yes
-	tas_file=sunstudio_sparc.s
-    ;;
-    i?86|x86_64)
-	need_tas=yes
-	tas_file=sunstudio_x86.s
-    ;;
-  esac
 fi
diff --git a/src/template/solaris b/src/template/solaris
index f88b1cdad37..f5306b3dd5b 100644
--- a/src/template/solaris
+++ b/src/template/solaris
@@ -13,19 +13,4 @@ if test "$SUN_STUDIO_CC" = yes ; then
   if test "$enable_debug" != yes; then
     CFLAGS="$CFLAGS -O"		# any optimization breaks debug
   fi
-
-  # Pick the right test-and-set (TAS) code for the Sun compiler.
-  # We would like to use in-line assembler, but the compiler
-  # requires *.il files to be on every compile line, making
-  # the build system too fragile.
-  case $host_cpu in
-    sparc)
-	need_tas=yes
-	tas_file=sunstudio_sparc.s
-    ;;
-    i?86|x86_64)
-	need_tas=yes
-	tas_file=sunstudio_x86.s
-    ;;
-  esac
 fi
diff --git a/src/test/regress/regress.c b/src/test/regress/regress.c
index 14aad5a0c6e..0cc0bbe3b40 100644
--- a/src/test/regress/regress.c
+++ b/src/test/regress/regress.c
@@ -851,32 +851,9 @@ test_spinlock(void)
 		SpinLockAcquire(&struct_w_lock.lock);
 		SpinLockRelease(&struct_w_lock.lock);
 
-		/* test basic operations via underlying S_* API */
-		S_INIT_LOCK(&struct_w_lock.lock);
-		S_LOCK(&struct_w_lock.lock);
-		S_UNLOCK(&struct_w_lock.lock);
-
 		/* and that "contended" acquisition works */
 		s_lock(&struct_w_lock.lock, "testfile", 17, "testfunc");
-		S_UNLOCK(&struct_w_lock.lock);
-
-		/*
-		 * Check, using TAS directly, that a single spin cycle doesn't block
-		 * when acquiring an already acquired lock.
-		 */
-#ifdef TAS
-		S_LOCK(&struct_w_lock.lock);
-
-		if (!TAS(&struct_w_lock.lock))
-			elog(ERROR, "acquired already held spinlock");
-
-#ifdef TAS_SPIN
-		if (!TAS_SPIN(&struct_w_lock.lock))
-			elog(ERROR, "acquired already held spinlock");
-#endif							/* defined(TAS_SPIN) */
-
-		S_UNLOCK(&struct_w_lock.lock);
-#endif							/* defined(TAS) */
+		SpinLockRelease(&struct_w_lock.lock);
 
 		/*
 		 * Verify that after all of this the non-lock contents are still
-- 
2.45.2

From 3354ee5fd668b00b47920faffe454542a365932f Mon Sep 17 00:00:00 2001
From: Thomas Munro <thomas.mu...@gmail.com>
Date: Thu, 1 Aug 2024 11:00:20 +1200
Subject: [PATCH v3 2/3] Assert that spinlocks are not double-released.

---
 src/include/storage/spin.h | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/src/include/storage/spin.h b/src/include/storage/spin.h
index 326a2711f23..9414c111db8 100644
--- a/src/include/storage/spin.h
+++ b/src/include/storage/spin.h
@@ -109,6 +109,12 @@ SpinLockInit(volatile slock_t *lock)
 static inline void
 SpinLockRelease(volatile slock_t *lock)
 {
+	/*
+	 * Use a relaxed load to see that it's currently held.  That's OK because
+	 * we expect the calling thread to be the one that set it.
+	 */
+	Assert(!pg_atomic_unlocked_test_flag(lock));
+
 	pg_atomic_clear_flag(lock);
 }
 
-- 
2.45.2

From 8cf5d1088b22d0261423fe9e9060e6f9066c8fb5 Mon Sep 17 00:00:00 2001
From: Thomas Munro <thomas.mu...@gmail.com>
Date: Thu, 1 Aug 2024 11:09:44 +1200
Subject: [PATCH v3 3/3] Assert that pg_atomic_flag is initialized.

On x86, use 1 and 2 as the clear and set values for pg_atomic_flag.
This way we can add a useful assertion that spinlocks, built on top of
pg_atomic_flag, have been initialized before use and are not relying on
zeroed memory.
---
 src/include/port/atomics/arch-x86.h | 48 +++++++++++++++++++++++++++--
 1 file changed, 45 insertions(+), 3 deletions(-)

diff --git a/src/include/port/atomics/arch-x86.h b/src/include/port/atomics/arch-x86.h
index c12f8a60697..b0c4ae566a0 100644
--- a/src/include/port/atomics/arch-x86.h
+++ b/src/include/port/atomics/arch-x86.h
@@ -130,11 +130,41 @@ pg_spin_delay_impl(void)
 
 #if defined(__GNUC__) || defined(__INTEL_COMPILER)
 
+/*
+ * Note: This implementation uses non-zero values to help detect uninitialized
+ * usage on a common platform.  Note that, like the C11 atomic_flag interface
+ * that it is modeled on, pg_atomic_flag never reveals the actual values to
+ * callers and contents of memory are not specified.
+ */
+#define PG_ATOMIC_FLAG_SET 1
+#define PG_ATOMIC_FLAG_CLEAR 2
+
+static inline void
+pg_atomic_check_flag(volatile pg_atomic_flag *ptr)
+{
+#if USE_ASSERT_CHECKING
+	char		value = ptr->value;
+
+	/* Sanity check that flag has been initialized. */
+	Assert(value == PG_ATOMIC_FLAG_SET || value == PG_ATOMIC_FLAG_CLEAR);
+#endif
+}
+
+#define PG_HAVE_ATOMIC_INIT_FLAG
+static inline void
+pg_atomic_init_flag_impl(volatile pg_atomic_flag *ptr)
+{
+	ptr->value = PG_ATOMIC_FLAG_CLEAR;
+	__asm__ __volatile__("" ::: "memory");
+}
+
 #define PG_HAVE_ATOMIC_TEST_SET_FLAG
 static inline bool
 pg_atomic_test_set_flag_impl(volatile pg_atomic_flag *ptr)
 {
-	char		_res = 1;
+	char		_res = PG_ATOMIC_FLAG_SET;
+
+	pg_atomic_check_flag(ptr);
 
 	__asm__ __volatile__(
 		"	lock			\n"
@@ -142,19 +172,31 @@ pg_atomic_test_set_flag_impl(volatile pg_atomic_flag *ptr)
 :		"+q"(_res), "+m"(ptr->value)
 :
 :		"memory");
-	return _res == 0;
+	return _res == PG_ATOMIC_FLAG_CLEAR;
 }
 
 #define PG_HAVE_ATOMIC_CLEAR_FLAG
 static inline void
 pg_atomic_clear_flag_impl(volatile pg_atomic_flag *ptr)
 {
+	pg_atomic_check_flag(ptr);
+
 	/*
 	 * On a TSO architecture like x86 it's sufficient to use a compiler
 	 * barrier to achieve release semantics.
 	 */
 	__asm__ __volatile__("" ::: "memory");
-	ptr->value = 0;
+	ptr->value = PG_ATOMIC_FLAG_CLEAR;
+}
+
+#define PG_HAVE_ATOMIC_UNLOCKED_TEST_FLAG
+static inline bool
+pg_atomic_unlocked_test_flag_impl(volatile pg_atomic_flag *ptr)
+{
+	pg_atomic_check_flag(ptr);
+
+	__asm__ __volatile__("" ::: "memory");
+	return ptr->value == PG_ATOMIC_FLAG_CLEAR;
 }
 
 #define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32
-- 
2.45.2

Reply via email to