On Thu, Aug 1, 2024 at 7:07 AM Andres Freund <and...@anarazel.de> wrote:
> Note that I would like to add a user for S_LOCK_FREE(), to detect repeated
> SpinLockRelease():
> https://postgr.es/m/20240729182952.hua325647e2ggbsy%40awork3.anarazel.de

What about adding a "magic" member in assertion builds?  Here is my
attempt at that, in 0002.

I also realised that we might as well skip the trivial S_XXX macros
and delete s_lock.h.  In this version of 0001 we retain just spin.h,
but s_lock.c still exists to hold the slow path.
From 47d5c4537dd741efc0fd6ac54393d2e7aca7ec8b Mon Sep 17 00:00:00 2001
From: Thomas Munro <thomas.mu...@gmail.com>
Date: Wed, 31 Jul 2024 13:11:35 +1200
Subject: [PATCH v2 1/2] Use atomics API to implement spinlocks.

Since our spinlock API pre-dates our C11-style atomics API by decades,
it had its own hand-crafted operations written in assembler.  Use the
atomics API instead, to simplify and de-duplicate.  We couldn't have
done this earlier, because that'd be circular: atomics were simulated
with spinlocks in --disable-atomics builds.  Commit 81385261 removed
that option, so now we can delete most the system-specific spinlock code
and just redirect everything to pg_atomic_flag.

The main special knowledge embodied in the hand-crafted code was the
relaxed load of the lock value before attempting to test-and-set, while
spinning.  That is retained in simplified form in the new coding.
---
 configure                              |  22 -
 configure.ac                           |  19 -
 src/Makefile.global.in                 |   3 -
 src/backend/port/Makefile              |  12 -
 src/backend/port/meson.build           |   2 +-
 src/backend/port/tas/dummy.s           |   0
 src/backend/port/tas/sunstudio_sparc.s |  53 --
 src/backend/port/tas/sunstudio_x86.s   |  43 --
 src/backend/storage/lmgr/s_lock.c      | 128 +----
 src/include/storage/s_lock.h           | 749 -------------------------
 src/include/storage/spin.h             |  60 +-
 src/template/linux                     |  15 -
 src/template/solaris                   |  15 -
 src/test/regress/regress.c             |  25 +-
 14 files changed, 75 insertions(+), 1071 deletions(-)
 delete mode 100644 src/backend/port/tas/dummy.s
 delete mode 100644 src/backend/port/tas/sunstudio_sparc.s
 delete mode 100644 src/backend/port/tas/sunstudio_x86.s
 delete mode 100644 src/include/storage/s_lock.h

diff --git a/configure b/configure
index 8f684f7945e..e2267837b7d 100755
--- a/configure
+++ b/configure
@@ -731,7 +731,6 @@ PKG_CONFIG_LIBDIR
 PKG_CONFIG_PATH
 PKG_CONFIG
 DLSUFFIX
-TAS
 GCC
 CPP
 CFLAGS_SL
@@ -3021,12 +3020,6 @@ $as_echo "$template" >&6; }
 PORTNAME=$template
 
 
-# Initialize default assumption that we do not need separate assembly code
-# for TAS (test-and-set).  This can be overridden by the template file
-# when it's executed.
-need_tas=no
-tas_file=dummy.s
-
 # Default, works for most platforms, override in template file if needed
 DLSUFFIX=".so"
 
@@ -7770,20 +7763,6 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu
 
 
 
-#
-# Set up TAS assembly code if needed; the template file has now had its
-# chance to request this.
-#
-ac_config_links="$ac_config_links src/backend/port/tas.s:src/backend/port/tas/${tas_file}"
-
-
-if test "$need_tas" = yes ; then
-  TAS=tas.o
-else
-  TAS=""
-fi
-
-
 
 cat >>confdefs.h <<_ACEOF
 #define DLSUFFIX "$DLSUFFIX"
@@ -19924,7 +19903,6 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
 for ac_config_target in $ac_config_targets
 do
   case $ac_config_target in
-    "src/backend/port/tas.s") CONFIG_LINKS="$CONFIG_LINKS src/backend/port/tas.s:src/backend/port/tas/${tas_file}" ;;
     "GNUmakefile") CONFIG_FILES="$CONFIG_FILES GNUmakefile" ;;
     "src/Makefile.global") CONFIG_FILES="$CONFIG_FILES src/Makefile.global" ;;
     "src/backend/port/pg_sema.c") CONFIG_LINKS="$CONFIG_LINKS src/backend/port/pg_sema.c:${SEMA_IMPLEMENTATION}" ;;
diff --git a/configure.ac b/configure.ac
index 75b73532fe0..59c3b7e3d35 100644
--- a/configure.ac
+++ b/configure.ac
@@ -95,12 +95,6 @@ AC_MSG_RESULT([$template])
 PORTNAME=$template
 AC_SUBST(PORTNAME)
 
-# Initialize default assumption that we do not need separate assembly code
-# for TAS (test-and-set).  This can be overridden by the template file
-# when it's executed.
-need_tas=no
-tas_file=dummy.s
-
 # Default, works for most platforms, override in template file if needed
 DLSUFFIX=".so"
 
@@ -740,19 +734,6 @@ AC_PROG_CPP
 AC_SUBST(GCC)
 
 
-#
-# Set up TAS assembly code if needed; the template file has now had its
-# chance to request this.
-#
-AC_CONFIG_LINKS([src/backend/port/tas.s:src/backend/port/tas/${tas_file}])
-
-if test "$need_tas" = yes ; then
-  TAS=tas.o
-else
-  TAS=""
-fi
-AC_SUBST(TAS)
-
 AC_SUBST(DLSUFFIX)dnl
 AC_DEFINE_UNQUOTED([DLSUFFIX], ["$DLSUFFIX"],
                    [Define to the file name extension of dynamically-loadable modules.])
diff --git a/src/Makefile.global.in b/src/Makefile.global.in
index 83b91fe9167..0301f463027 100644
--- a/src/Makefile.global.in
+++ b/src/Makefile.global.in
@@ -771,9 +771,6 @@ ifeq ($(PORTNAME),win32)
 LIBS += -lws2_32
 endif
 
-# Not really standard libc functions, used by the backend.
-TAS         = @TAS@
-
 
 ##########################################################################
 #
diff --git a/src/backend/port/Makefile b/src/backend/port/Makefile
index 47338d99229..8613ac01aff 100644
--- a/src/backend/port/Makefile
+++ b/src/backend/port/Makefile
@@ -22,7 +22,6 @@ top_builddir = ../../..
 include $(top_builddir)/src/Makefile.global
 
 OBJS = \
-	$(TAS) \
 	atomics.o \
 	pg_sema.o \
 	pg_shmem.o
@@ -33,16 +32,5 @@ endif
 
 include $(top_srcdir)/src/backend/common.mk
 
-tas.o: tas.s
-ifeq ($(SUN_STUDIO_CC), yes)
-# preprocess assembler file with cpp
-	$(CC) $(CFLAGS) -c -P $<
-	mv $*.i $*_cpp.s
-	$(CC) $(CFLAGS) -c $*_cpp.s -o $@
-else
-	$(CC) $(CFLAGS) -c $<
-endif
-
 clean:
-	rm -f tas_cpp.s
 	$(MAKE) -C win32 clean
diff --git a/src/backend/port/meson.build b/src/backend/port/meson.build
index 7820e86016d..3270ffb7030 100644
--- a/src/backend/port/meson.build
+++ b/src/backend/port/meson.build
@@ -30,4 +30,4 @@ if host_system == 'windows'
 endif
 
 # autoconf generates the file there, ensure we get a conflict
-generated_sources_ac += {'src/backend/port': ['pg_sema.c', 'pg_shmem.c', 'tas.s']}
+generated_sources_ac += {'src/backend/port': ['pg_sema.c', 'pg_shmem.c']}
diff --git a/src/backend/port/tas/dummy.s b/src/backend/port/tas/dummy.s
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/src/backend/port/tas/sunstudio_sparc.s b/src/backend/port/tas/sunstudio_sparc.s
deleted file mode 100644
index 3400713afd5..00000000000
--- a/src/backend/port/tas/sunstudio_sparc.s
+++ /dev/null
@@ -1,53 +0,0 @@
-!-------------------------------------------------------------------------
-!
-! sunstudio_sparc.s
-!	  compare and swap for Sun Studio on Sparc
-!
-! Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
-! Portions Copyright (c) 1994, Regents of the University of California
-!
-! IDENTIFICATION
-!	  src/backend/port/tas/sunstudio_sparc.s
-!
-!-------------------------------------------------------------------------
-
-! Fortunately the Sun compiler can process cpp conditionals with -P
-
-! '/' is the comment for x86, while '!' is the comment for Sparc
-
-#if defined(__sparcv9) || defined(__sparc)
-
-	.section        ".text"
-	.align  8
-	.skip   24
-	.align  4
-
-	.global pg_atomic_cas
-pg_atomic_cas:
-
-	! "cas" only works on sparcv9 and sparcv8plus chips, and
-	! requires a compiler targeting these CPUs.  It will fail
-	! on a compiler targeting sparcv8, and of course will not
-	! be understood by a sparcv8 CPU.  gcc continues to use
-	! "ldstub" because it targets sparcv7.
-	!
-	! There is actually a trick for embedding "cas" in a
-	! sparcv8-targeted compiler, but it can only be run
-	! on a sparcv8plus/v9 cpus:
-	!
-	!   http://cvs.opensolaris.org/source/xref/on/usr/src/lib/libc/sparc/threads/sparc.il
-	!
-	! NB: We're assuming we're running on a TSO system here - solaris
-	! userland luckily always has done so.
-
-#if defined(__sparcv9) || defined(__sparcv8plus)
-	cas     [%o0],%o2,%o1
-#else
-	ldstub [%o0],%o1
-#endif
-	mov     %o1,%o0
-	retl
-	nop
-	.type   pg_atomic_cas,2
-	.size   pg_atomic_cas,(.-pg_atomic_cas)
-#endif
diff --git a/src/backend/port/tas/sunstudio_x86.s b/src/backend/port/tas/sunstudio_x86.s
deleted file mode 100644
index b4608a9ceb2..00000000000
--- a/src/backend/port/tas/sunstudio_x86.s
+++ /dev/null
@@ -1,43 +0,0 @@
-/-------------------------------------------------------------------------
-/
-/ sunstudio_x86.s
-/	  compare and swap for Sun Studio on x86
-/
-/ Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
-/ Portions Copyright (c) 1994, Regents of the University of California
-/
-/ IDENTIFICATION
-/	  src/backend/port/tas/sunstudio_x86.s
-/
-/-------------------------------------------------------------------------
-
-/ Fortunately the Sun compiler can process cpp conditionals with -P
-
-/ '/' is the comment for x86, while '!' is the comment for Sparc
-
-	.file   "tas.s"
-
-#if defined(__amd64)
-	.code64
-#endif
-
-	.globl pg_atomic_cas
-	.type pg_atomic_cas, @function
-
-	.section .text, "ax"
-	.align 16
-
-pg_atomic_cas:
-#if defined(__amd64)
-	movl       %edx,%eax
-	lock
-	cmpxchgl   %esi,(%rdi)
-#else
-	movl    4(%esp), %edx
-	movl    8(%esp), %ecx
-	movl    12(%esp), %eax
-	lock
-	cmpxchgl %ecx, (%edx)
-#endif
-	ret
-	.size pg_atomic_cas, . - pg_atomic_cas
diff --git a/src/backend/storage/lmgr/s_lock.c b/src/backend/storage/lmgr/s_lock.c
index 69549a65dba..18a98b6e638 100644
--- a/src/backend/storage/lmgr/s_lock.c
+++ b/src/backend/storage/lmgr/s_lock.c
@@ -52,7 +52,7 @@
 
 #include "common/pg_prng.h"
 #include "port/atomics.h"
-#include "storage/s_lock.h"
+#include "storage/spin.h"
 #include "utils/wait_event.h"
 
 #define MIN_SPINS_PER_DELAY 10
@@ -93,7 +93,7 @@ s_lock_stuck(const char *file, int line, const char *func)
 }
 
 /*
- * s_lock(lock) - platform-independent portion of waiting for a spinlock.
+ * s_lock(lock) - out-of-line portion of waiting for a spinlock.
  */
 int
 s_lock(volatile slock_t *lock, const char *file, int line, const char *func)
@@ -102,8 +102,27 @@ s_lock(volatile slock_t *lock, const char *file, int line, const char *func)
 
 	init_spin_delay(&delayStatus, file, line, func);
 
-	while (TAS_SPIN(lock))
+	for (;;)
 	{
+		bool		probably_free = true;
+
+#if defined(__i386__) || defined(__x86_64__) || \
+	defined(_M_IX86) || defined(_M_AMD64) || \
+	defined(__ppc__) || defined(__powerpc__) || \
+	defined(__ppc64__) || defined(__powerpc64__) \
+
+
+		/*
+		 * On these architectures, it is known to be more efficient to test
+		 * the lock with a relaxed load first, while spinning.
+		 */
+		probably_free = pg_atomic_unlocked_test_flag(lock);
+#endif
+
+		/* Try to get the lock. */
+		if (probably_free && pg_atomic_test_set_flag(lock))
+			break;
+
 		perform_spin_delay(&delayStatus);
 	}
 
@@ -112,14 +131,6 @@ s_lock(volatile slock_t *lock, const char *file, int line, const char *func)
 	return delayStatus.delays;
 }
 
-#ifdef USE_DEFAULT_S_UNLOCK
-void
-s_unlock(volatile slock_t *lock)
-{
-	*lock = 0;
-}
-#endif
-
 /*
  * Wait while spinning on a contended spinlock.
  */
@@ -127,7 +138,7 @@ void
 perform_spin_delay(SpinDelayStatus *status)
 {
 	/* CPU-specific delay each time through the loop */
-	SPIN_DELAY();
+	pg_spin_delay();
 
 	/* Block the process every spins_per_delay tries */
 	if (++(status->spins) >= spins_per_delay)
@@ -230,96 +241,3 @@ update_spins_per_delay(int shared_spins_per_delay)
 	 */
 	return (shared_spins_per_delay * 15 + spins_per_delay) / 16;
 }
-
-
-/*****************************************************************************/
-#if defined(S_LOCK_TEST)
-
-/*
- * test program for verifying a port's spinlock support.
- */
-
-struct test_lock_struct
-{
-	char		pad1;
-	slock_t		lock;
-	char		pad2;
-};
-
-volatile struct test_lock_struct test_lock;
-
-int
-main()
-{
-	pg_prng_seed(&pg_global_prng_state, (uint64) time(NULL));
-
-	test_lock.pad1 = test_lock.pad2 = 0x44;
-
-	S_INIT_LOCK(&test_lock.lock);
-
-	if (test_lock.pad1 != 0x44 || test_lock.pad2 != 0x44)
-	{
-		printf("S_LOCK_TEST: failed, declared datatype is wrong size\n");
-		return 1;
-	}
-
-	if (!S_LOCK_FREE(&test_lock.lock))
-	{
-		printf("S_LOCK_TEST: failed, lock not initialized\n");
-		return 1;
-	}
-
-	S_LOCK(&test_lock.lock);
-
-	if (test_lock.pad1 != 0x44 || test_lock.pad2 != 0x44)
-	{
-		printf("S_LOCK_TEST: failed, declared datatype is wrong size\n");
-		return 1;
-	}
-
-	if (S_LOCK_FREE(&test_lock.lock))
-	{
-		printf("S_LOCK_TEST: failed, lock not locked\n");
-		return 1;
-	}
-
-	S_UNLOCK(&test_lock.lock);
-
-	if (test_lock.pad1 != 0x44 || test_lock.pad2 != 0x44)
-	{
-		printf("S_LOCK_TEST: failed, declared datatype is wrong size\n");
-		return 1;
-	}
-
-	if (!S_LOCK_FREE(&test_lock.lock))
-	{
-		printf("S_LOCK_TEST: failed, lock not unlocked\n");
-		return 1;
-	}
-
-	S_LOCK(&test_lock.lock);
-
-	if (test_lock.pad1 != 0x44 || test_lock.pad2 != 0x44)
-	{
-		printf("S_LOCK_TEST: failed, declared datatype is wrong size\n");
-		return 1;
-	}
-
-	if (S_LOCK_FREE(&test_lock.lock))
-	{
-		printf("S_LOCK_TEST: failed, lock not re-locked\n");
-		return 1;
-	}
-
-	printf("S_LOCK_TEST: this will print %d stars and then\n", NUM_DELAYS);
-	printf("             exit with a 'stuck spinlock' message\n");
-	printf("             if S_LOCK() and TAS() are working.\n");
-	fflush(stdout);
-
-	s_lock(&test_lock.lock, __FILE__, __LINE__, __func__);
-
-	printf("S_LOCK_TEST: failed, lock not locked\n");
-	return 1;
-}
-
-#endif							/* S_LOCK_TEST */
diff --git a/src/include/storage/s_lock.h b/src/include/storage/s_lock.h
deleted file mode 100644
index e94ed5f48bd..00000000000
--- a/src/include/storage/s_lock.h
+++ /dev/null
@@ -1,749 +0,0 @@
-/*-------------------------------------------------------------------------
- *
- * s_lock.h
- *	   Implementation of spinlocks.
- *
- *	NOTE: none of the macros in this file are intended to be called directly.
- *	Call them through the macros in spin.h.
- *
- *	The following hardware-dependent macros must be provided for each
- *	supported platform:
- *
- *	void S_INIT_LOCK(slock_t *lock)
- *		Initialize a spinlock (to the unlocked state).
- *
- *	int S_LOCK(slock_t *lock)
- *		Acquire a spinlock, waiting if necessary.
- *		Time out and abort() if unable to acquire the lock in a
- *		"reasonable" amount of time --- typically ~ 1 minute.
- *		Should return number of "delays"; see s_lock.c
- *
- *	void S_UNLOCK(slock_t *lock)
- *		Unlock a previously acquired lock.
- *
- *	bool S_LOCK_FREE(slock_t *lock)
- *		Tests if the lock is free. Returns true if free, false if locked.
- *		This does *not* change the state of the lock.
- *
- *	void SPIN_DELAY(void)
- *		Delay operation to occur inside spinlock wait loop.
- *
- *	Note to implementors: there are default implementations for all these
- *	macros at the bottom of the file.  Check if your platform can use
- *	these or needs to override them.
- *
- *  Usually, S_LOCK() is implemented in terms of even lower-level macros
- *	TAS() and TAS_SPIN():
- *
- *	int TAS(slock_t *lock)
- *		Atomic test-and-set instruction.  Attempt to acquire the lock,
- *		but do *not* wait.	Returns 0 if successful, nonzero if unable
- *		to acquire the lock.
- *
- *	int TAS_SPIN(slock_t *lock)
- *		Like TAS(), but this version is used when waiting for a lock
- *		previously found to be contended.  By default, this is the
- *		same as TAS(), but on some architectures it's better to poll a
- *		contended lock using an unlocked instruction and retry the
- *		atomic test-and-set only when it appears free.
- *
- *	TAS() and TAS_SPIN() are NOT part of the API, and should never be called
- *	directly.
- *
- *	CAUTION: on some platforms TAS() and/or TAS_SPIN() may sometimes report
- *	failure to acquire a lock even when the lock is not locked.  For example,
- *	on Alpha TAS() will "fail" if interrupted.  Therefore a retry loop must
- *	always be used, even if you are certain the lock is free.
- *
- *	It is the responsibility of these macros to make sure that the compiler
- *	does not re-order accesses to shared memory to precede the actual lock
- *	acquisition, or follow the lock release.  Prior to PostgreSQL 9.5, this
- *	was the caller's responsibility, which meant that callers had to use
- *	volatile-qualified pointers to refer to both the spinlock itself and the
- *	shared data being accessed within the spinlocked critical section.  This
- *	was notationally awkward, easy to forget (and thus error-prone), and
- *	prevented some useful compiler optimizations.  For these reasons, we
- *	now require that the macros themselves prevent compiler re-ordering,
- *	so that the caller doesn't need to take special precautions.
- *
- *	On platforms with weak memory ordering, the TAS(), TAS_SPIN(), and
- *	S_UNLOCK() macros must further include hardware-level memory fence
- *	instructions to prevent similar re-ordering at the hardware level.
- *	TAS() and TAS_SPIN() must guarantee that loads and stores issued after
- *	the macro are not executed until the lock has been obtained.  Conversely,
- *	S_UNLOCK() must guarantee that loads and stores issued before the macro
- *	have been executed before the lock is released.
- *
- *	On most supported platforms, TAS() uses a tas() function written
- *	in assembly language to execute a hardware atomic-test-and-set
- *	instruction.  Equivalent OS-supplied mutex routines could be used too.
- *
- *
- * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
- * Portions Copyright (c) 1994, Regents of the University of California
- *
- *	  src/include/storage/s_lock.h
- *
- *-------------------------------------------------------------------------
- */
-#ifndef S_LOCK_H
-#define S_LOCK_H
-
-#ifdef FRONTEND
-#error "s_lock.h may not be included from frontend code"
-#endif
-
-#if defined(__GNUC__) || defined(__INTEL_COMPILER)
-/*************************************************************************
- * All the gcc inlines
- * Gcc consistently defines the CPU as __cpu__.
- * Other compilers use __cpu or __cpu__ so we test for both in those cases.
- */
-
-/*----------
- * Standard gcc asm format (assuming "volatile slock_t *lock"):
-
-	__asm__ __volatile__(
-		"	instruction	\n"
-		"	instruction	\n"
-		"	instruction	\n"
-:		"=r"(_res), "+m"(*lock)		// return register, in/out lock value
-:		"r"(lock)					// lock pointer, in input register
-:		"memory", "cc");			// show clobbered registers here
-
- * The output-operands list (after first colon) should always include
- * "+m"(*lock), whether or not the asm code actually refers to this
- * operand directly.  This ensures that gcc believes the value in the
- * lock variable is used and set by the asm code.  Also, the clobbers
- * list (after third colon) should always include "memory"; this prevents
- * gcc from thinking it can cache the values of shared-memory fields
- * across the asm code.  Add "cc" if your asm code changes the condition
- * code register, and also list any temp registers the code uses.
- *----------
- */
-
-
-#ifdef __i386__		/* 32-bit i386 */
-#define HAS_TEST_AND_SET
-
-typedef unsigned char slock_t;
-
-#define TAS(lock) tas(lock)
-
-static __inline__ int
-tas(volatile slock_t *lock)
-{
-	slock_t		_res = 1;
-
-	/*
-	 * Use a non-locking test before asserting the bus lock.  Note that the
-	 * extra test appears to be a small loss on some x86 platforms and a small
-	 * win on others; it's by no means clear that we should keep it.
-	 *
-	 * When this was last tested, we didn't have separate TAS() and TAS_SPIN()
-	 * macros.  Nowadays it probably would be better to do a non-locking test
-	 * in TAS_SPIN() but not in TAS(), like on x86_64, but no-one's done the
-	 * testing to verify that.  Without some empirical evidence, better to
-	 * leave it alone.
-	 */
-	__asm__ __volatile__(
-		"	cmpb	$0,%1	\n"
-		"	jne		1f		\n"
-		"	lock			\n"
-		"	xchgb	%0,%1	\n"
-		"1: \n"
-:		"+q"(_res), "+m"(*lock)
-:		/* no inputs */
-:		"memory", "cc");
-	return (int) _res;
-}
-
-#define SPIN_DELAY() spin_delay()
-
-static __inline__ void
-spin_delay(void)
-{
-	/*
-	 * This sequence is equivalent to the PAUSE instruction ("rep" is
-	 * ignored by old IA32 processors if the following instruction is
-	 * not a string operation); the IA-32 Architecture Software
-	 * Developer's Manual, Vol. 3, Section 7.7.2 describes why using
-	 * PAUSE in the inner loop of a spin lock is necessary for good
-	 * performance:
-	 *
-	 *     The PAUSE instruction improves the performance of IA-32
-	 *     processors supporting Hyper-Threading Technology when
-	 *     executing spin-wait loops and other routines where one
-	 *     thread is accessing a shared lock or semaphore in a tight
-	 *     polling loop. When executing a spin-wait loop, the
-	 *     processor can suffer a severe performance penalty when
-	 *     exiting the loop because it detects a possible memory order
-	 *     violation and flushes the core processor's pipeline. The
-	 *     PAUSE instruction provides a hint to the processor that the
-	 *     code sequence is a spin-wait loop. The processor uses this
-	 *     hint to avoid the memory order violation and prevent the
-	 *     pipeline flush. In addition, the PAUSE instruction
-	 *     de-pipelines the spin-wait loop to prevent it from
-	 *     consuming execution resources excessively.
-	 */
-	__asm__ __volatile__(
-		" rep; nop			\n");
-}
-
-#endif	 /* __i386__ */
-
-
-#ifdef __x86_64__		/* AMD Opteron, Intel EM64T */
-#define HAS_TEST_AND_SET
-
-typedef unsigned char slock_t;
-
-#define TAS(lock) tas(lock)
-
-/*
- * On Intel EM64T, it's a win to use a non-locking test before the xchg proper,
- * but only when spinning.
- *
- * See also Implementing Scalable Atomic Locks for Multi-Core Intel(tm) EM64T
- * and IA32, by Michael Chynoweth and Mary R. Lee. As of this writing, it is
- * available at:
- * http://software.intel.com/en-us/articles/implementing-scalable-atomic-locks-for-multi-core-intel-em64t-and-ia32-architectures
- */
-#define TAS_SPIN(lock)    (*(lock) ? 1 : TAS(lock))
-
-static __inline__ int
-tas(volatile slock_t *lock)
-{
-	slock_t		_res = 1;
-
-	__asm__ __volatile__(
-		"	lock			\n"
-		"	xchgb	%0,%1	\n"
-:		"+q"(_res), "+m"(*lock)
-:		/* no inputs */
-:		"memory", "cc");
-	return (int) _res;
-}
-
-#define SPIN_DELAY() spin_delay()
-
-static __inline__ void
-spin_delay(void)
-{
-	/*
-	 * Adding a PAUSE in the spin delay loop is demonstrably a no-op on
-	 * Opteron, but it may be of some use on EM64T, so we keep it.
-	 */
-	__asm__ __volatile__(
-		" rep; nop			\n");
-}
-
-#endif	 /* __x86_64__ */
-
-
-/*
- * On ARM and ARM64, we use __sync_lock_test_and_set(int *, int) if available.
- *
- * We use the int-width variant of the builtin because it works on more chips
- * than other widths.
- */
-#if defined(__arm__) || defined(__arm) || defined(__aarch64__)
-#ifdef HAVE_GCC__SYNC_INT32_TAS
-#define HAS_TEST_AND_SET
-
-#define TAS(lock) tas(lock)
-
-typedef int slock_t;
-
-static __inline__ int
-tas(volatile slock_t *lock)
-{
-	return __sync_lock_test_and_set(lock, 1);
-}
-
-#define S_UNLOCK(lock) __sync_lock_release(lock)
-
-/*
- * Using an ISB instruction to delay in spinlock loops appears beneficial on
- * high-core-count ARM64 processors.  It seems mostly a wash for smaller gear,
- * and ISB doesn't exist at all on pre-v7 ARM chips.
- */
-#if defined(__aarch64__)
-
-#define SPIN_DELAY() spin_delay()
-
-static __inline__ void
-spin_delay(void)
-{
-	__asm__ __volatile__(
-		" isb;				\n");
-}
-
-#endif	 /* __aarch64__ */
-#endif	 /* HAVE_GCC__SYNC_INT32_TAS */
-#endif	 /* __arm__ || __arm || __aarch64__ */
-
-
-/* S/390 and S/390x Linux (32- and 64-bit zSeries) */
-#if defined(__s390__) || defined(__s390x__)
-#define HAS_TEST_AND_SET
-
-typedef unsigned int slock_t;
-
-#define TAS(lock)	   tas(lock)
-
-static __inline__ int
-tas(volatile slock_t *lock)
-{
-	int			_res = 0;
-
-	__asm__	__volatile__(
-		"	cs 	%0,%3,0(%2)		\n"
-:		"+d"(_res), "+m"(*lock)
-:		"a"(lock), "d"(1)
-:		"memory", "cc");
-	return _res;
-}
-
-#endif	 /* __s390__ || __s390x__ */
-
-
-#if defined(__sparc__)		/* Sparc */
-/*
- * Solaris has always run sparc processors in TSO (total store) mode, but
- * linux didn't use to and the *BSDs still don't. So, be careful about
- * acquire/release semantics. The CPU will treat superfluous members as
- * NOPs, so it's just code space.
- */
-#define HAS_TEST_AND_SET
-
-typedef unsigned char slock_t;
-
-#define TAS(lock) tas(lock)
-
-static __inline__ int
-tas(volatile slock_t *lock)
-{
-	slock_t		_res;
-
-	/*
-	 *	See comment in src/backend/port/tas/sunstudio_sparc.s for why this
-	 *	uses "ldstub", and that file uses "cas".  gcc currently generates
-	 *	sparcv7-targeted binaries, so "cas" use isn't possible.
-	 */
-	__asm__ __volatile__(
-		"	ldstub	[%2], %0	\n"
-:		"=r"(_res), "+m"(*lock)
-:		"r"(lock)
-:		"memory");
-#if defined(__sparcv7) || defined(__sparc_v7__)
-	/*
-	 * No stbar or membar available, luckily no actually produced hardware
-	 * requires a barrier.
-	 */
-#elif defined(__sparcv8) || defined(__sparc_v8__)
-	/* stbar is available (and required for both PSO, RMO), membar isn't */
-	__asm__ __volatile__ ("stbar	 \n":::"memory");
-#else
-	/*
-	 * #LoadStore (RMO) | #LoadLoad (RMO) together are the appropriate acquire
-	 * barrier for sparcv8+ upwards.
-	 */
-	__asm__ __volatile__ ("membar #LoadStore | #LoadLoad \n":::"memory");
-#endif
-	return (int) _res;
-}
-
-#if defined(__sparcv7) || defined(__sparc_v7__)
-/*
- * No stbar or membar available, luckily no actually produced hardware
- * requires a barrier.  We fall through to the default gcc definition of
- * S_UNLOCK in this case.
- */
-#elif defined(__sparcv8) || defined(__sparc_v8__)
-/* stbar is available (and required for both PSO, RMO), membar isn't */
-#define S_UNLOCK(lock)	\
-do \
-{ \
-	__asm__ __volatile__ ("stbar	 \n":::"memory"); \
-	*((volatile slock_t *) (lock)) = 0; \
-} while (0)
-#else
-/*
- * #LoadStore (RMO) | #StoreStore (RMO, PSO) together are the appropriate
- * release barrier for sparcv8+ upwards.
- */
-#define S_UNLOCK(lock)	\
-do \
-{ \
-	__asm__ __volatile__ ("membar #LoadStore | #StoreStore \n":::"memory"); \
-	*((volatile slock_t *) (lock)) = 0; \
-} while (0)
-#endif
-
-#endif	 /* __sparc__ */
-
-
-/* PowerPC */
-#if defined(__ppc__) || defined(__powerpc__) || defined(__ppc64__) || defined(__powerpc64__)
-#define HAS_TEST_AND_SET
-
-typedef unsigned int slock_t;
-
-#define TAS(lock) tas(lock)
-
-/* On PPC, it's a win to use a non-locking test before the lwarx */
-#define TAS_SPIN(lock)	(*(lock) ? 1 : TAS(lock))
-
-/*
- * The second operand of addi can hold a constant zero or a register number,
- * hence constraint "=&b" to avoid allocating r0.  "b" stands for "address
- * base register"; most operands having this register-or-zero property are
- * address bases, e.g. the second operand of lwax.
- *
- * NOTE: per the Enhanced PowerPC Architecture manual, v1.0 dated 7-May-2002,
- * an isync is a sufficient synchronization barrier after a lwarx/stwcx loop.
- * But if the spinlock is in ordinary memory, we can use lwsync instead for
- * better performance.
- */
-static __inline__ int
-tas(volatile slock_t *lock)
-{
-	slock_t _t;
-	int _res;
-
-	__asm__ __volatile__(
-"	lwarx   %0,0,%3,1	\n"
-"	cmpwi   %0,0		\n"
-"	bne     1f			\n"
-"	addi    %0,%0,1		\n"
-"	stwcx.  %0,0,%3		\n"
-"	beq     2f			\n"
-"1: \n"
-"	li      %1,1		\n"
-"	b       3f			\n"
-"2: \n"
-"	lwsync				\n"
-"	li      %1,0		\n"
-"3: \n"
-:	"=&b"(_t), "=r"(_res), "+m"(*lock)
-:	"r"(lock)
-:	"memory", "cc");
-	return _res;
-}
-
-/*
- * PowerPC S_UNLOCK is almost standard but requires a "sync" instruction.
- * But we can use lwsync instead for better performance.
- */
-#define S_UNLOCK(lock)	\
-do \
-{ \
-	__asm__ __volatile__ ("	lwsync \n" ::: "memory"); \
-	*((volatile slock_t *) (lock)) = 0; \
-} while (0)
-
-#endif /* powerpc */
-
-
-#if defined(__mips__) && !defined(__sgi)	/* non-SGI MIPS */
-#define HAS_TEST_AND_SET
-
-typedef unsigned int slock_t;
-
-#define TAS(lock) tas(lock)
-
-/*
- * Original MIPS-I processors lacked the LL/SC instructions, but if we are
- * so unfortunate as to be running on one of those, we expect that the kernel
- * will handle the illegal-instruction traps and emulate them for us.  On
- * anything newer (and really, MIPS-I is extinct) LL/SC is the only sane
- * choice because any other synchronization method must involve a kernel
- * call.  Unfortunately, many toolchains still default to MIPS-I as the
- * codegen target; if the symbol __mips shows that that's the case, we
- * have to force the assembler to accept LL/SC.
- *
- * R10000 and up processors require a separate SYNC, which has the same
- * issues as LL/SC.
- */
-#if __mips < 2
-#define MIPS_SET_MIPS2	"       .set mips2          \n"
-#else
-#define MIPS_SET_MIPS2
-#endif
-
-static __inline__ int
-tas(volatile slock_t *lock)
-{
-	volatile slock_t *_l = lock;
-	int			_res;
-	int			_tmp;
-
-	__asm__ __volatile__(
-		"       .set push           \n"
-		MIPS_SET_MIPS2
-		"       .set noreorder      \n"
-		"       .set nomacro        \n"
-		"       ll      %0, %2      \n"
-		"       or      %1, %0, 1   \n"
-		"       sc      %1, %2      \n"
-		"       xori    %1, 1       \n"
-		"       or      %0, %0, %1  \n"
-		"       sync                \n"
-		"       .set pop              "
-:		"=&r" (_res), "=&r" (_tmp), "+R" (*_l)
-:		/* no inputs */
-:		"memory");
-	return _res;
-}
-
-/* MIPS S_UNLOCK is almost standard but requires a "sync" instruction */
-#define S_UNLOCK(lock)	\
-do \
-{ \
-	__asm__ __volatile__( \
-		"       .set push           \n" \
-		MIPS_SET_MIPS2 \
-		"       .set noreorder      \n" \
-		"       .set nomacro        \n" \
-		"       sync                \n" \
-		"       .set pop              " \
-:		/* no outputs */ \
-:		/* no inputs */	\
-:		"memory"); \
-	*((volatile slock_t *) (lock)) = 0; \
-} while (0)
-
-#endif /* __mips__ && !__sgi */
-
-
-
-/*
- * If we have no platform-specific knowledge, but we found that the compiler
- * provides __sync_lock_test_and_set(), use that.  Prefer the int-width
- * version over the char-width version if we have both, on the rather dubious
- * grounds that that's known to be more likely to work in the ARM ecosystem.
- * (But we dealt with ARM above.)
- */
-#if !defined(HAS_TEST_AND_SET)
-
-#if defined(HAVE_GCC__SYNC_INT32_TAS)
-#define HAS_TEST_AND_SET
-
-#define TAS(lock) tas(lock)
-
-typedef int slock_t;
-
-static __inline__ int
-tas(volatile slock_t *lock)
-{
-	return __sync_lock_test_and_set(lock, 1);
-}
-
-#define S_UNLOCK(lock) __sync_lock_release(lock)
-
-#elif defined(HAVE_GCC__SYNC_CHAR_TAS)
-#define HAS_TEST_AND_SET
-
-#define TAS(lock) tas(lock)
-
-typedef char slock_t;
-
-static __inline__ int
-tas(volatile slock_t *lock)
-{
-	return __sync_lock_test_and_set(lock, 1);
-}
-
-#define S_UNLOCK(lock) __sync_lock_release(lock)
-
-#endif	 /* HAVE_GCC__SYNC_INT32_TAS */
-
-#endif	/* !defined(HAS_TEST_AND_SET) */
-
-
-/*
- * Default implementation of S_UNLOCK() for gcc/icc.
- *
- * Note that this implementation is unsafe for any platform that can reorder
- * a memory access (either load or store) after a following store.  That
- * happens not to be possible on x86 and most legacy architectures (some are
- * single-processor!), but many modern systems have weaker memory ordering.
- * Those that do must define their own version of S_UNLOCK() rather than
- * relying on this one.
- */
-#if !defined(S_UNLOCK)
-#define S_UNLOCK(lock)	\
-	do { __asm__ __volatile__("" : : : "memory");  *(lock) = 0; } while (0)
-#endif
-
-#endif	/* defined(__GNUC__) || defined(__INTEL_COMPILER) */
-
-
-/*
- * ---------------------------------------------------------------------
- * Platforms that use non-gcc inline assembly:
- * ---------------------------------------------------------------------
- */
-
-#if !defined(HAS_TEST_AND_SET)	/* We didn't trigger above, let's try here */
-
-/* These are in sunstudio_(sparc|x86).s */
-
-#if defined(__SUNPRO_C) && (defined(__i386) || defined(__x86_64__) || defined(__sparc__) || defined(__sparc))
-#define HAS_TEST_AND_SET
-
-#if defined(__i386) || defined(__x86_64__) || defined(__sparcv9) || defined(__sparcv8plus)
-typedef unsigned int slock_t;
-#else
-typedef unsigned char slock_t;
-#endif
-
-extern slock_t pg_atomic_cas(volatile slock_t *lock, slock_t with,
-									  slock_t cmp);
-
-#define TAS(a) (pg_atomic_cas((a), 1, 0) != 0)
-#endif
-
-
-#ifdef _MSC_VER
-typedef LONG slock_t;
-
-#define HAS_TEST_AND_SET
-#define TAS(lock) (InterlockedCompareExchange(lock, 1, 0))
-
-#define SPIN_DELAY() spin_delay()
-
-/* If using Visual C++ on Win64, inline assembly is unavailable.
- * Use a _mm_pause intrinsic instead of rep nop.
- */
-#if defined(_WIN64)
-static __forceinline void
-spin_delay(void)
-{
-	_mm_pause();
-}
-#else
-static __forceinline void
-spin_delay(void)
-{
-	/* See comment for gcc code. Same code, MASM syntax */
-	__asm rep nop;
-}
-#endif
-
-#include <intrin.h>
-#pragma intrinsic(_ReadWriteBarrier)
-
-#define S_UNLOCK(lock)	\
-	do { _ReadWriteBarrier(); (*(lock)) = 0; } while (0)
-
-#endif
-
-
-#endif	/* !defined(HAS_TEST_AND_SET) */
-
-
-/* Blow up if we didn't have any way to do spinlocks */
-#ifndef HAS_TEST_AND_SET
-#error PostgreSQL does not have spinlock support on this platform.  Please report this to pgsql-b...@lists.postgresql.org.
-#endif
-
-
-/*
- * Default Definitions - override these above as needed.
- */
-
-#if !defined(S_LOCK)
-#define S_LOCK(lock) \
-	(TAS(lock) ? s_lock((lock), __FILE__, __LINE__, __func__) : 0)
-#endif	 /* S_LOCK */
-
-#if !defined(S_LOCK_FREE)
-#define S_LOCK_FREE(lock)	(*(lock) == 0)
-#endif	 /* S_LOCK_FREE */
-
-#if !defined(S_UNLOCK)
-/*
- * Our default implementation of S_UNLOCK is essentially *(lock) = 0.  This
- * is unsafe if the platform can reorder a memory access (either load or
- * store) after a following store; platforms where this is possible must
- * define their own S_UNLOCK.  But CPU reordering is not the only concern:
- * if we simply defined S_UNLOCK() as an inline macro, the compiler might
- * reorder instructions from inside the critical section to occur after the
- * lock release.  Since the compiler probably can't know what the external
- * function s_unlock is doing, putting the same logic there should be adequate.
- * A sufficiently-smart globally optimizing compiler could break that
- * assumption, though, and the cost of a function call for every spinlock
- * release may hurt performance significantly, so we use this implementation
- * only for platforms where we don't know of a suitable intrinsic.  For the
- * most part, those are relatively obscure platform/compiler combinations to
- * which the PostgreSQL project does not have access.
- */
-#define USE_DEFAULT_S_UNLOCK
-extern void s_unlock(volatile slock_t *lock);
-#define S_UNLOCK(lock)		s_unlock(lock)
-#endif	 /* S_UNLOCK */
-
-#if !defined(S_INIT_LOCK)
-#define S_INIT_LOCK(lock)	S_UNLOCK(lock)
-#endif	 /* S_INIT_LOCK */
-
-#if !defined(SPIN_DELAY)
-#define SPIN_DELAY()	((void) 0)
-#endif	 /* SPIN_DELAY */
-
-#if !defined(TAS)
-extern int	tas(volatile slock_t *lock);		/* in port/.../tas.s, or
-												 * s_lock.c */
-
-#define TAS(lock)		tas(lock)
-#endif	 /* TAS */
-
-#if !defined(TAS_SPIN)
-#define TAS_SPIN(lock)	TAS(lock)
-#endif	 /* TAS_SPIN */
-
-
-/*
- * Platform-independent out-of-line support routines
- */
-extern int s_lock(volatile slock_t *lock, const char *file, int line, const char *func);
-
-/* Support for dynamic adjustment of spins_per_delay */
-#define DEFAULT_SPINS_PER_DELAY  100
-
-extern void set_spins_per_delay(int shared_spins_per_delay);
-extern int	update_spins_per_delay(int shared_spins_per_delay);
-
-/*
- * Support for spin delay which is useful in various places where
- * spinlock-like procedures take place.
- */
-typedef struct
-{
-	int			spins;
-	int			delays;
-	int			cur_delay;
-	const char *file;
-	int			line;
-	const char *func;
-} SpinDelayStatus;
-
-static inline void
-init_spin_delay(SpinDelayStatus *status,
-				const char *file, int line, const char *func)
-{
-	status->spins = 0;
-	status->delays = 0;
-	status->cur_delay = 0;
-	status->file = file;
-	status->line = line;
-	status->func = func;
-}
-
-#define init_local_spin_delay(status) init_spin_delay(status, __FILE__, __LINE__, __func__)
-extern void perform_spin_delay(SpinDelayStatus *status);
-extern void finish_spin_delay(SpinDelayStatus *status);
-
-#endif	 /* S_LOCK_H */
diff --git a/src/include/storage/spin.h b/src/include/storage/spin.h
index 3ae2a56d073..24edac4822d 100644
--- a/src/include/storage/spin.h
+++ b/src/include/storage/spin.h
@@ -14,9 +14,11 @@
  *		Acquire a spinlock, waiting if necessary.
  *		Time out and abort() if unable to acquire the lock in a
  *		"reasonable" amount of time --- typically ~ 1 minute.
+ *		Acquire (including read barrier) semantics.
  *
  *	void SpinLockRelease(volatile slock_t *lock)
  *		Unlock a previously acquired lock.
+ *		Release (including write barrier) semantics.
  *
  *	bool SpinLockFree(slock_t *lock)
  *		Tests if the lock is free. Returns true if free, false if locked.
@@ -35,11 +37,6 @@
  *	for a CHECK_FOR_INTERRUPTS() to occur while holding a spinlock, and so
  *	it is not necessary to do HOLD/RESUME_INTERRUPTS() in these macros.
  *
- *	These macros are implemented in terms of hardware-dependent macros
- *	supplied by s_lock.h.  There is not currently any extra functionality
- *	added by this header, but there has been in the past and may someday
- *	be again.
- *
  *
  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
@@ -51,15 +48,58 @@
 #ifndef SPIN_H
 #define SPIN_H
 
-#include "storage/s_lock.h"
+#ifdef FRONTEND
+#error "spin.h may not be included from frontend code"
+#endif
+
+#include "port/atomics.h"
+
+typedef pg_atomic_flag slock_t;
 
 
-#define SpinLockInit(lock)	S_INIT_LOCK(lock)
+/* Support for dynamic adjustment of spins_per_delay */
+#define DEFAULT_SPINS_PER_DELAY  100
+
+/*
+ * Support for spin delay which is useful in various places where
+ * spinlock-like procedures take place.
+ */
+typedef struct
+{
+	int			spins;
+	int			delays;
+	int			cur_delay;
+	const char *file;
+	int			line;
+	const char *func;
+} SpinDelayStatus;
+
+static inline void
+init_spin_delay(SpinDelayStatus *status,
+				const char *file, int line, const char *func)
+{
+	status->spins = 0;
+	status->delays = 0;
+	status->cur_delay = 0;
+	status->file = file;
+	status->line = line;
+	status->func = func;
+}
 
-#define SpinLockAcquire(lock) S_LOCK(lock)
+#define init_local_spin_delay(status) init_spin_delay(status, __FILE__, __LINE__, __func__)
+extern void perform_spin_delay(SpinDelayStatus *status);
+extern void finish_spin_delay(SpinDelayStatus *status);
+extern void set_spins_per_delay(int shared_spins_per_delay);
+extern int	update_spins_per_delay(int shared_spins_per_delay);
 
-#define SpinLockRelease(lock) S_UNLOCK(lock)
+/* Out-of-line part of spinlock acquisition. */
+extern int	s_lock(volatile slock_t *lock,
+				   const char *file, int line,
+				   const char *func);
 
-#define SpinLockFree(lock)	S_LOCK_FREE(lock)
+#define SpinLockInit(lock) pg_atomic_init_flag(lock)
+#define SpinLockAcquire(lock) (pg_atomic_test_set_flag(lock) ? 0 : s_lock((lock), __FILE__, __LINE__, __func__))
+#define SpinLockRelease(lock) pg_atomic_clear_flag(lock)
+#define SpinLockFree(lock) pg_atomic_unlocked_test_flag(lock)
 
 #endif							/* SPIN_H */
diff --git a/src/template/linux b/src/template/linux
index ec3302c4a22..2f04c1a6610 100644
--- a/src/template/linux
+++ b/src/template/linux
@@ -21,19 +21,4 @@ if test "$SUN_STUDIO_CC" = "yes" ; then
   if test "$enable_debug" != yes; then
     CFLAGS="$CFLAGS -O"		# any optimization breaks debug
   fi
-
-  # Pick the right test-and-set (TAS) code for the Sun compiler.
-  # We would like to use in-line assembler, but the compiler
-  # requires *.il files to be on every compile line, making
-  # the build system too fragile.
-  case $host_cpu in
-    sparc)
-	need_tas=yes
-	tas_file=sunstudio_sparc.s
-    ;;
-    i?86|x86_64)
-	need_tas=yes
-	tas_file=sunstudio_x86.s
-    ;;
-  esac
 fi
diff --git a/src/template/solaris b/src/template/solaris
index f88b1cdad37..f5306b3dd5b 100644
--- a/src/template/solaris
+++ b/src/template/solaris
@@ -13,19 +13,4 @@ if test "$SUN_STUDIO_CC" = yes ; then
   if test "$enable_debug" != yes; then
     CFLAGS="$CFLAGS -O"		# any optimization breaks debug
   fi
-
-  # Pick the right test-and-set (TAS) code for the Sun compiler.
-  # We would like to use in-line assembler, but the compiler
-  # requires *.il files to be on every compile line, making
-  # the build system too fragile.
-  case $host_cpu in
-    sparc)
-	need_tas=yes
-	tas_file=sunstudio_sparc.s
-    ;;
-    i?86|x86_64)
-	need_tas=yes
-	tas_file=sunstudio_x86.s
-    ;;
-  esac
 fi
diff --git a/src/test/regress/regress.c b/src/test/regress/regress.c
index 14aad5a0c6e..0cc0bbe3b40 100644
--- a/src/test/regress/regress.c
+++ b/src/test/regress/regress.c
@@ -851,32 +851,9 @@ test_spinlock(void)
 		SpinLockAcquire(&struct_w_lock.lock);
 		SpinLockRelease(&struct_w_lock.lock);
 
-		/* test basic operations via underlying S_* API */
-		S_INIT_LOCK(&struct_w_lock.lock);
-		S_LOCK(&struct_w_lock.lock);
-		S_UNLOCK(&struct_w_lock.lock);
-
 		/* and that "contended" acquisition works */
 		s_lock(&struct_w_lock.lock, "testfile", 17, "testfunc");
-		S_UNLOCK(&struct_w_lock.lock);
-
-		/*
-		 * Check, using TAS directly, that a single spin cycle doesn't block
-		 * when acquiring an already acquired lock.
-		 */
-#ifdef TAS
-		S_LOCK(&struct_w_lock.lock);
-
-		if (!TAS(&struct_w_lock.lock))
-			elog(ERROR, "acquired already held spinlock");
-
-#ifdef TAS_SPIN
-		if (!TAS_SPIN(&struct_w_lock.lock))
-			elog(ERROR, "acquired already held spinlock");
-#endif							/* defined(TAS_SPIN) */
-
-		S_UNLOCK(&struct_w_lock.lock);
-#endif							/* defined(TAS) */
+		SpinLockRelease(&struct_w_lock.lock);
 
 		/*
 		 * Verify that after all of this the non-lock contents are still
-- 
2.45.2

From e9f4795bcb0b85ef7d95bc16bb91b2b0ebeef131 Mon Sep 17 00:00:00 2001
From: Thomas Munro <thomas.mu...@gmail.com>
Date: Thu, 1 Aug 2024 09:55:18 +1200
Subject: [PATCH v2 2/2] Add some assertions to spinlocks.

In assertion builds, use a magic values to check that the spinlock was
initialized, and also check that a lock was held when releasing.
---
 src/backend/storage/lmgr/s_lock.c |  4 +--
 src/include/storage/spin.h        | 46 ++++++++++++++++++++++++++-----
 2 files changed, 41 insertions(+), 9 deletions(-)

diff --git a/src/backend/storage/lmgr/s_lock.c b/src/backend/storage/lmgr/s_lock.c
index 18a98b6e638..5785082a720 100644
--- a/src/backend/storage/lmgr/s_lock.c
+++ b/src/backend/storage/lmgr/s_lock.c
@@ -116,11 +116,11 @@ s_lock(volatile slock_t *lock, const char *file, int line, const char *func)
 		 * On these architectures, it is known to be more efficient to test
 		 * the lock with a relaxed load first, while spinning.
 		 */
-		probably_free = pg_atomic_unlocked_test_flag(lock);
+		probably_free = pg_atomic_unlocked_test_flag(&lock->flag);
 #endif
 
 		/* Try to get the lock. */
-		if (probably_free && pg_atomic_test_set_flag(lock))
+		if (probably_free && pg_atomic_test_set_flag(&lock->flag))
 			break;
 
 		perform_spin_delay(&delayStatus);
diff --git a/src/include/storage/spin.h b/src/include/storage/spin.h
index 24edac4822d..e9bf1023954 100644
--- a/src/include/storage/spin.h
+++ b/src/include/storage/spin.h
@@ -54,12 +54,19 @@
 
 #include "port/atomics.h"
 
-typedef pg_atomic_flag slock_t;
-
-
 /* Support for dynamic adjustment of spins_per_delay */
 #define DEFAULT_SPINS_PER_DELAY  100
 
+#define SLOCKT_T_MAGIC 0xc7f31f05
+
+typedef struct
+{
+	pg_atomic_flag flag;
+#ifdef USE_ASSERT_CHECKING
+	int			magic;
+#endif
+} slock_t;
+
 /*
  * Support for spin delay which is useful in various places where
  * spinlock-like procedures take place.
@@ -97,9 +104,34 @@ extern int	s_lock(volatile slock_t *lock,
 				   const char *file, int line,
 				   const char *func);
 
-#define SpinLockInit(lock) pg_atomic_init_flag(lock)
-#define SpinLockAcquire(lock) (pg_atomic_test_set_flag(lock) ? 0 : s_lock((lock), __FILE__, __LINE__, __func__))
-#define SpinLockRelease(lock) pg_atomic_clear_flag(lock)
-#define SpinLockFree(lock) pg_atomic_unlocked_test_flag(lock)
+static inline void
+SpinLockInit(volatile slock_t *lock)
+{
+#ifdef USE_ASSERT_CHECKING
+	/* Used to detect use of uninitialized spinlocks. */
+	lock->magic = SLOCKT_T_MAGIC;
+#endif
+
+	pg_atomic_init_flag(&lock->flag);
+}
+
+#define SpinLockAcquire(lock) \
+	(AssertMacro((lock)->magic == SLOCKT_T_MAGIC), \
+	 pg_atomic_test_set_flag(&(lock)->flag) ? 0 : \
+	 s_lock((lock), __FILE__, __LINE__, __func__))
+
+static inline void
+SpinLockRelease(volatile slock_t *lock)
+{
+	Assert(lock->magic == SLOCKT_T_MAGIC);
+
+	/*
+	 * Use a relaxed load to see that it's currently held.  That's OK because
+	 * we expect the calling thread to be the one that set it.
+	 */
+	Assert(!pg_atomic_unlocked_test_flag(&(lock)->flag));
+
+	pg_atomic_clear_flag(&(lock)->flag);
+}
 
 #endif							/* SPIN_H */
-- 
2.45.2

Reply via email to