From 176fb95d4b0850501a977f6af6ea8404b9dbf420 Mon Sep 17 00:00:00 2001
From: Zhijie Hou <houzj.fnst@fujitsu.com>
Date: Thu, 13 Nov 2025 10:04:30 +0800
Subject: [PATCH v1] Fix unexpected WAL removal

Currently, there is a race condition that ReplicationSlotReserveWal selects a
WAL position to be reserved but has not yet updated estart_lsn, while the
checkpoint process might select a later WAL position as the minimum, causing
premature removal of WAL needed by a slot.

This commit fixes it by taking exclusive lock on ReplicationSlotAllocationLock
when reserving WAL to serialize the minimum restart_lsn computation in checkpoint
process and WAL reservation, ensuring that:

1) If the WAL reservation occurs first, the checkpoint must wait for the
restart_lsn to be updated before proceeding with WAL removal. This guarantees
that the most recent restart_lsn position is detected.

2) If the checkpoint calls CheckPointReplicationSlots() first, then any
subsequent WAL reservation must take a position later than the redo pointer.
---
 src/backend/replication/logical/slotsync.c |  9 +++++++++
 src/backend/replication/slot.c             | 13 +++++++++++++
 2 files changed, 22 insertions(+)

diff --git a/src/backend/replication/logical/slotsync.c b/src/backend/replication/logical/slotsync.c
index 8b4afd87dc9..e4d5ac066b4 100644
--- a/src/backend/replication/logical/slotsync.c
+++ b/src/backend/replication/logical/slotsync.c
@@ -495,6 +495,13 @@ reserve_wal_for_local_slot(XLogRecPtr restart_lsn)
 	Assert(slot != NULL);
 	Assert(!XLogRecPtrIsValid(slot->data.restart_lsn));
 
+	/*
+	 * Acquire an exclusive lock to prevent race conditions between WAL
+	 * reservation and minimum restart_lsn computation during checkpoints. See
+	 * comments in ReplicationSlotReserveWal() for more details.
+	 */
+	LWLockAcquire(ReplicationSlotAllocationLock, LW_EXCLUSIVE);
+
 	while (true)
 	{
 		SpinLockAcquire(&slot->mutex);
@@ -546,6 +553,8 @@ reserve_wal_for_local_slot(XLogRecPtr restart_lsn)
 		/* Retry using the location of the oldest wal segment */
 		XLogSegNoOffsetToRecPtr(oldest_segno, 0, wal_segment_size, restart_lsn);
 	}
+
+	LWLockRelease(ReplicationSlotAllocationLock);
 }
 
 /*
diff --git a/src/backend/replication/slot.c b/src/backend/replication/slot.c
index 1ec1e997b27..b617c6e229b 100644
--- a/src/backend/replication/slot.c
+++ b/src/backend/replication/slot.c
@@ -1576,6 +1576,17 @@ ReplicationSlotReserveWal(void)
 	Assert(!XLogRecPtrIsValid(slot->data.restart_lsn));
 	Assert(!XLogRecPtrIsValid(slot->last_saved_restart_lsn));
 
+	/*
+	 * Acquire an exclusive lock to prevent the checkpoint process from
+	 * concurrently calculating the minimum restart_lsn (refer to
+	 * CheckPointReplicationSlots). This ensures that no race conditions occur
+	 * where this function selects a WAL position but has not yet updated
+	 * restart_lsn, while the checkpoint process might select a later WAL
+	 * position as the minimum, causing premature removal of WAL needed by this
+	 * slot.
+	 */
+	LWLockAcquire(ReplicationSlotAllocationLock, LW_EXCLUSIVE);
+
 	/*
 	 * The replication slot mechanism is used to prevent removal of required
 	 * WAL. As there is no interlock between this routine and checkpoints, WAL
@@ -1629,6 +1640,8 @@ ReplicationSlotReserveWal(void)
 			break;
 	}
 
+	LWLockRelease(ReplicationSlotAllocationLock);
+
 	if (!RecoveryInProgress() && SlotIsLogical(slot))
 	{
 		XLogRecPtr	flushptr;
-- 
2.51.1.windows.1

