From 1e84803cd111734538dcc630c6f96e23c762d655 Mon Sep 17 00:00:00 2001
From: Alexander Korotkov <akorotkov@postgresql.org>
Date: Tue, 6 Aug 2024 03:52:39 +0300
Subject: [PATCH v1 1/3] Adjust pg_wal_replay_wait() procedure behavior on
 promoted standby

pg_wal_replay_wait() is intended to be called on standby.  However, standby
can be promoted to primary at any moment, even concurrently with the
pg_wal_replay_wait() call.  If recovery is not currently in progress
that doesn't mean the wait was unsuccessful.  Thus, we always need to recheck
if the target LSN is replayed.

Reported-by: Kevin Hale Boyes
Discussion: https://postgr.es/m/CAPpHfdu5QN%2BZGACS%2B7foxmr8_nekgA2PA%2B-G3BuOUrdBLBFb6Q%40mail.gmail.com
Author: Alexander Korotkov
---
 doc/src/sgml/func.sgml                     |  9 +++++
 src/backend/commands/waitlsn.c             | 41 +++++++++++++++++-----
 src/test/recovery/t/043_wal_replay_wait.pl | 15 ++++++--
 3 files changed, 54 insertions(+), 11 deletions(-)

diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 0f7154b76ab..968a9985527 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -28969,6 +28969,15 @@ postgres=# SELECT '0/0'::pg_lsn + pd.segment_number * ps.setting::int + :offset
     connection pooler side.
    </para>
 
+   <para>
+    <function>pg_wal_replay_wait</function> should be called on standby.
+    If a user calls <function>pg_wal_replay_wait</function> on primary, it
+    will error out.  However, if <function>pg_wal_replay_wait</function> is
+    called on primary promoted from standby and <literal>target_lsn</literal>
+    was already replayed, then <function>pg_wal_replay_wait</function> just
+    exits immediately.
+   </para>
+
    <para>
     You can use <function>pg_wal_replay_wait</function> to wait for
     the <type>pg_lsn</type> value.  For example, an application could update
diff --git a/src/backend/commands/waitlsn.c b/src/backend/commands/waitlsn.c
index 3170f0792a5..6651801dfb4 100644
--- a/src/backend/commands/waitlsn.c
+++ b/src/backend/commands/waitlsn.c
@@ -230,14 +230,27 @@ WaitForLSNReplay(XLogRecPtr targetLSN, int64 timeout)
 	Assert(MyProcNumber >= 0 && MyProcNumber < MaxBackends);
 
 	if (!RecoveryInProgress())
+	{
+		/*
+		 * Recovery is not in progress.  Given that we detected this in the
+		 * very first check, this procedure was mistakenly called on primary.
+		 * However, it's possible that standby was promoted concurrently to
+		 * the procedure call, while target LSN is replayed.  So, we still
+		 * check the last replay LSN before reporting an error.
+		 */
+		if (targetLSN <= GetXLogReplayRecPtr(NULL))
+			return;
 		ereport(ERROR,
 				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
 				 errmsg("recovery is not in progress"),
 				 errhint("Waiting for LSN can only be executed during recovery.")));
-
-	/* If target LSN is already replayed, exit immediately */
-	if (targetLSN <= GetXLogReplayRecPtr(NULL))
-		return;
+	}
+	else
+	{
+		/* If target LSN is already replayed, exit immediately */
+		if (targetLSN <= GetXLogReplayRecPtr(NULL))
+			return;
+	}
 
 	if (timeout > 0)
 	{
@@ -257,19 +270,29 @@ WaitForLSNReplay(XLogRecPtr targetLSN, int64 timeout)
 		int			rc;
 		long		delay_ms = 0;
 
-		/* Check if the waited LSN has been replayed */
-		currentLSN = GetXLogReplayRecPtr(NULL);
-		if (targetLSN <= currentLSN)
-			break;
-
 		/* Recheck that recovery is still in-progress */
 		if (!RecoveryInProgress())
+		{
+			/*
+			 * Recovery was ended, but recheck if target LSN was already
+			 * replayed.
+			 */
+			if (targetLSN <= GetXLogReplayRecPtr(NULL))
+				return;
 			ereport(ERROR,
 					(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
 					 errmsg("recovery is not in progress"),
 					 errdetail("Recovery ended before replaying target LSN %X/%X; last replay LSN %X/%X.",
 							   LSN_FORMAT_ARGS(targetLSN),
 							   LSN_FORMAT_ARGS(currentLSN))));
+		}
+		else
+		{
+			/* Check if the waited LSN has been replayed */
+			currentLSN = GetXLogReplayRecPtr(NULL);
+			if (targetLSN <= currentLSN)
+				break;
+		}
 
 		/*
 		 * If the timeout value is specified, calculate the number of
diff --git a/src/test/recovery/t/043_wal_replay_wait.pl b/src/test/recovery/t/043_wal_replay_wait.pl
index e4842730b05..c3131acb75a 100644
--- a/src/test/recovery/t/043_wal_replay_wait.pl
+++ b/src/test/recovery/t/043_wal_replay_wait.pl
@@ -126,12 +126,18 @@ ok(1, 'multiple LSN waiters reported consistent data');
 
 # 5. Check that the standby promotion terminates the wait on LSN.  Start
 # waiting for an unreachable LSN then promote.  Check the log for the relevant
-# error message.
+# error message.  Alsom check that waiting for already replayed LSN doesn't
+# cause an error even after promotion.
+my $lsn4 =
+  $node_primary->safe_psql('postgres',
+	"SELECT pg_current_wal_insert_lsn() + 10000000000");
+my $lsn5 =
+  $node_primary->safe_psql('postgres', "SELECT pg_current_wal_insert_lsn()");
 my $psql_session = $node_standby1->background_psql('postgres');
 $psql_session->query_until(
 	qr/start/, qq[
 	\\echo start
-	CALL pg_wal_replay_wait('${lsn3}');
+	CALL pg_wal_replay_wait('${lsn4}');
 ]);
 
 $log_offset = -s $node_standby1->logfile;
@@ -140,6 +146,11 @@ $node_standby1->wait_for_log('recovery is not in progress', $log_offset);
 
 ok(1, 'got error after standby promote');
 
+$node_standby1->safe_psql('postgres', "CALL pg_wal_replay_wait(${lsn5});");
+
+ok(1,
+	'wait for already replayed LSN exists immediately even after promotion');
+
 $node_standby1->stop;
 $node_primary->stop;
 
-- 
2.39.3 (Apple Git-145)

