From 6584480f4fa9ebc91ac81c06cd2aa3bc20b7af5e Mon Sep 17 00:00:00 2001
From: Zhijie Hou <houzj.fnst@fujitsu.com>
Date: Fri, 21 Nov 2025 13:18:13 +0800
Subject: [PATCH v6 2/3] Fix the race condition of updating slot minimum LSN

Previously, there is a race condition: if a backend creates a new slot and
attempts to initialize the slot.restart_lsn during WAL reservation, but
meanwhile, another backend invokes ReplicationSlotsComputeRequiredLSN(), the
slot minimum LSN may be initially updated by the newly created slot, only to be
subsequently overwritten by the backend running
ReplicationSlotsComputeRequiredLSN() with an more recent LSN. This scenario
could lead to the premature removal of WALs reserved by the new slot during a
checkpoint, resulting in the newly created slot being invalidated.

The commit closes this race condition by acquiring an exclusive
ReplicationSlotControlLock when updating slot.restart_lsn during WAL
reservation. Additionally, XLogSetReplicationSlotMinimumLSN() is placed under
the protection of the ReplicationSlotControlLock. This serializes the update of
slot.restart_lsn and the computation of the minimum LSN in other backends,
ensuring that a more recent minimum LSN isn't computed while an older one is
still being reserved.
---
 src/backend/replication/slot.c | 21 ++++++++++++++++++++-
 1 file changed, 20 insertions(+), 1 deletion(-)

diff --git a/src/backend/replication/slot.c b/src/backend/replication/slot.c
index a75d59e96d7..027f23b4b43 100644
--- a/src/backend/replication/slot.c
+++ b/src/backend/replication/slot.c
@@ -1237,6 +1237,12 @@ ReplicationSlotsComputeRequiredLSN(void)
 
 	Assert(ReplicationSlotCtl != NULL);
 
+	/*
+	 * Hold the ReplicationSlotControlLock until after updating the slot minimum
+	 * LSN value, to prevent overwriting the minimum LSN with a position more
+	 * recent than the WAL position reserved by another newly created slot (see
+	 * ReplicationSlotReserveWal and reserve_wal_for_local_slot).
+	 */
 	LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
 	for (i = 0; i < max_replication_slots; i++)
 	{
@@ -1282,9 +1288,10 @@ ReplicationSlotsComputeRequiredLSN(void)
 			 restart_lsn < min_required))
 			min_required = restart_lsn;
 	}
-	LWLockRelease(ReplicationSlotControlLock);
 
 	XLogSetReplicationSlotMinimumLSN(min_required);
+
+	LWLockRelease(ReplicationSlotControlLock);
 }
 
 /*
@@ -1614,10 +1621,22 @@ ReplicationSlotReserveWal(void)
 	else
 		restart_lsn = GetXLogInsertRecPtr();
 
+	/*
+	 * Hold the ReplicationSlotControlLock exclusive when updating the slot
+	 * restart_lsn, so no backend can compute and update the new value
+	 * concurrently. This prevents other from backends from overwriting the
+	 * minimum LSN with a position more recent than the WAL position being
+	 * reserved , ensuring the WALs required by this slot are not prematurely
+	 * removed during checkpoint.
+	 */
+	LWLockAcquire(ReplicationSlotControlLock, LW_EXCLUSIVE);
+
 	SpinLockAcquire(&slot->mutex);
 	slot->data.restart_lsn = restart_lsn;
 	SpinLockRelease(&slot->mutex);
 
+	LWLockRelease(ReplicationSlotControlLock);
+
 	/* prevent WAL removal as fast as possible */
 	ReplicationSlotsComputeRequiredLSN();
 
-- 
2.51.1.windows.1

