From 4e61a9a4c1ec2535c1ffc5988cf720a2107bbfbf Mon Sep 17 00:00:00 2001
From: Shveta Malik <shveta.malik@gmail.com>
Date: Thu, 4 Apr 2024 14:18:51 +0530
Subject: [PATCH v5 1/2] Correct sanity check to compare confirmed_lsn.

This patch attempts to fix a BF failure introduced by commit 2ec005b.

It is seen in BF failure logs, that during the first call of the
SQL function pg_sync_replication_slots(), restart_lsn of the synced
slot is advanced to a lsn which is > than remote slot's restart_lsn.
And when sync SQL function is executed the second time, without any
further change on primary, it hits the error:
  ERROR:  cannot synchronize local slot "lsub1_slot" LSN(0/3000060) to
remote slot's LSN(0/3000028) as synchronization would move it
backwards

Issue:
Issue is that we are having a wrong sanity check based on
'restart_lsn' in synchronize_one_slot():

if (remote_slot->restart_lsn < slot->data.restart_lsn)
    elog(ERROR, ...);

Prior to commit 2ec005b, this check was okay, as we did not expect
restart_lsn of the synced slot to be ahead of remote since we were
directly copying the lsns. But now since we use 'advance slot'
functionality to reach to a consistent state while syncing a slot,
there is a possibility that for a synced slot, while consuming
changes from remote's slot restart_lsn till confirmed_flush, slot
sync find running xacts record after reaching consistent point,
for which it serializes the snapshot and thus end up advancing
restart_lsn of standby ahead of remote-slot.

In order to fix this, corrected sanity check to compare confirmed_lsn
rather than restart_lsn.

Other changes are:
a)
Now we attempt sync in update_local_synced_slot() if one of confirmed_lsn,
restart_lsn, and catalog_xmin for remote slot is ahead of local slot, instead
of them just being inequal.

b)
A log has been added after LogicalSlotAdvanceAndCheckSnapState()
to report the case when the local and remote slots' confirmed-lsn
were not found to be the same after sync.
---
 src/backend/replication/logical/slotsync.c | 44 +++++++++++++++++-----
 1 file changed, 34 insertions(+), 10 deletions(-)

diff --git a/src/backend/replication/logical/slotsync.c b/src/backend/replication/logical/slotsync.c
index 97440cb6bf..0d4735fbfa 100644
--- a/src/backend/replication/logical/slotsync.c
+++ b/src/backend/replication/logical/slotsync.c
@@ -175,9 +175,14 @@ update_local_synced_slot(RemoteSlot *remote_slot, Oid remote_dbid,
 	if (found_consistent_snapshot)
 		*found_consistent_snapshot = false;
 
-	if (remote_slot->confirmed_lsn != slot->data.confirmed_flush ||
-		remote_slot->restart_lsn != slot->data.restart_lsn ||
-		remote_slot->catalog_xmin != slot->data.catalog_xmin)
+	/*
+	 * Attempt to sync LSNs and xmins only if remote slot is ahead of local
+	 * slot.
+	 */
+	if (remote_slot->confirmed_lsn > slot->data.confirmed_flush ||
+		remote_slot->restart_lsn > slot->data.restart_lsn ||
+		TransactionIdFollows(remote_slot->catalog_xmin,
+							 slot->data.catalog_xmin))
 	{
 		/*
 		 * We can't directly copy the remote slot's LSN or xmin unless there
@@ -208,6 +213,15 @@ update_local_synced_slot(RemoteSlot *remote_slot, Oid remote_dbid,
 		{
 			LogicalSlotAdvanceAndCheckSnapState(remote_slot->confirmed_lsn,
 												found_consistent_snapshot);
+
+			/* Sanity check */
+			if (slot->data.confirmed_flush != remote_slot->confirmed_lsn)
+				ereport(ERROR,
+						errmsg_internal("synchronized confirmed_flush for slot \"%s\" differs from remote slot",
+										remote_slot->name),
+						errdetail_internal("Remote slot has LSN %X/%X but local slot has LSN %X/%X.",
+										   LSN_FORMAT_ARGS(remote_slot->confirmed_lsn),
+										   LSN_FORMAT_ARGS(slot->data.confirmed_flush)));
 		}
 
 		ReplicationSlotsComputeRequiredXmin(false);
@@ -633,14 +647,24 @@ synchronize_one_slot(RemoteSlot *remote_slot, Oid remote_dbid)
 			/*
 			 * Sanity check: As long as the invalidations are handled
 			 * appropriately as above, this should never happen.
+			 *
+			 * We do not intend to check restart_lsn here, as restart_lsn of
+			 * the synced slot could be ahead of the remote slot. Since we use
+			 * advance slot functionality to reach to a consistent state while
+			 * syncing a slot, there is a possibility that for a synced slot,
+			 * while consuming changes from remote's slot restart_lsn till
+			 * confirmed_flush, slot sync find running xacts record after
+			 * reaching consistent point, for which it serializes the snapshot
+			 * and thus end up advancing restart_lsn of standby ahead of
+			 * remote-slot.
 			 */
-			if (remote_slot->restart_lsn < slot->data.restart_lsn)
-				elog(ERROR,
-					 "cannot synchronize local slot \"%s\" LSN(%X/%X)"
-					 " to remote slot's LSN(%X/%X) as synchronization"
-					 " would move it backwards", remote_slot->name,
-					 LSN_FORMAT_ARGS(slot->data.restart_lsn),
-					 LSN_FORMAT_ARGS(remote_slot->restart_lsn));
+			if (remote_slot->confirmed_lsn < slot->data.confirmed_flush)
+				ereport(ERROR,
+						errmsg_internal("cannot synchronize local slot \"%s\"",
+										remote_slot->name),
+						errdetail_internal("Local slot's start streaming location LSN(%X/%X) is ahead of remote slot's LSN(%X/%X).",
+										   LSN_FORMAT_ARGS(slot->data.confirmed_flush),
+										   LSN_FORMAT_ARGS(remote_slot->confirmed_lsn)));
 
 			/* Make sure the slot changes persist across server restart */
 			if (update_local_synced_slot(remote_slot, remote_dbid, NULL))
-- 
2.30.0.windows.2

