On Tue, Oct 20, 2020 at 1:22 PM Dilip Kumar <dilipbal...@gmail.com> wrote:
>
> On Tue, Oct 20, 2020 at 1:11 PM Simon Riggs <si...@2ndquadrant.com> wrote:
> >
> > On Mon, 19 Oct 2020 at 15:11, Dilip Kumar <dilipbal...@gmail.com> wrote:
> >
> > > We have an interface to pause the WAL replay (pg_wal_replay_pause) and
> > > to know whether the WAL replay pause is requested
> > > (pg_is_wal_replay_paused).  But there is no way to know whether the
> > > recovery is actually paused or not.  Actually, the recovery process
> > > might process an extra WAL before pausing the recovery.  So does it
> > > make sense to provide a new interface to tell whether the recovery is
> > > actually paused or not?
> > >
> > > One solution could be that we convert the XLogCtlData->recoveryPause
> > > from bool to tri-state variable (0-> recovery not paused 1-> pause
> > > requested 2-> actually paused).
> > >
> > > Any opinion on this?
> >
> > Why would we want this? What problem are you trying to solve?
>
> The requirement is to know the last replayed WAL on the standby so
> unless we can guarantee that the recovery is actually paused we can
> never get the safe last_replay_lsn value.
>
> > If we do care, why not fix pg_is_wal_replay_paused() so it responds as you 
> > wish?
>
> Maybe we can also do that but pg_is_wal_replay_paused is an existing
> API and the behavior is to know whether the recovery paused is
> requested or not,  So I am not sure is it a good idea to change the
> behavior of the existing API?
>

Attached is the POC patch to show what I have in mind.

-- 
Regards,
Dilip Kumar
EnterpriseDB: http://www.enterprisedb.com
From 7aeb50bb94bdf031408a54b517d5288d687462a2 Mon Sep 17 00:00:00 2001
From: Dilip Kumar <dilipkumar@localhost.localdomain>
Date: Tue, 20 Oct 2020 14:10:14 +0530
Subject: [PATCH] New interface to know wal recovery actually paused

---
 src/backend/access/transam/xlog.c      | 42 +++++++++++++++++++++++++++++-----
 src/backend/access/transam/xlogfuncs.c | 28 ++++++++++++++++++++---
 src/include/access/xlog.h              | 11 ++++++++-
 src/include/catalog/pg_proc.dat        |  4 ++++
 4 files changed, 75 insertions(+), 10 deletions(-)

diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 52a67b1..4c50eb9 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -714,8 +714,8 @@ typedef struct XLogCtlData
 	 * only relevant for replication or archive recovery
 	 */
 	TimestampTz currentChunkStartTime;
-	/* Are we requested to pause recovery? */
-	bool		recoveryPause;
+	/* Recovery pause state */
+	RecoveryPauseState		recoveryPause;
 
 	/*
 	 * lastFpwDisableRecPtr points to the start of the last replayed
@@ -5994,6 +5994,16 @@ recoveryStopsAfter(XLogReaderState *record)
 	return false;
 }
 
+static void
+CheckAndSetRecoveryPause(void)
+{
+	/* If recovery pause is requested then set it paused */
+	SpinLockAcquire(&XLogCtl->info_lck);
+	if (XLogCtl->recoveryPause == RECOVERY_PAUSE_REQUESTED)
+		XLogCtl->recoveryPause = RECOVERY_PAUSED;
+	SpinLockRelease(&XLogCtl->info_lck);
+}
+
 /*
  * Wait until shared recoveryPause flag is cleared.
  *
@@ -6025,12 +6035,20 @@ recoveryPausesHere(bool endOfRecovery)
 				(errmsg("recovery has paused"),
 				 errhint("Execute pg_wal_replay_resume() to continue.")));
 
-	while (RecoveryIsPaused())
+	while (RecoveryPauseRequested())
 	{
+
 		HandleStartupProcInterrupts();
+
 		if (CheckForStandbyTrigger())
 			return;
 		pgstat_report_wait_start(WAIT_EVENT_RECOVERY_PAUSE);
+
+		/*
+		 * While we are in the loop, user might resume and pause again so set
+		 * this everytime.
+		 */
+		CheckAndSetRecoveryPause();
 		pg_usleep(1000000L);	/* 1000 ms */
 		pgstat_report_wait_end();
 	}
@@ -6039,17 +6057,29 @@ recoveryPausesHere(bool endOfRecovery)
 bool
 RecoveryIsPaused(void)
 {
+	bool	recoveryPause;
+
+	SpinLockAcquire(&XLogCtl->info_lck);
+	recoveryPause = (XLogCtl->recoveryPause == RECOVERY_PAUSED) ? true : false;
+	SpinLockRelease(&XLogCtl->info_lck);
+
+	return recoveryPause;
+}
+
+bool
+RecoveryPauseRequested(void)
+{
 	bool		recoveryPause;
 
 	SpinLockAcquire(&XLogCtl->info_lck);
-	recoveryPause = XLogCtl->recoveryPause;
+	recoveryPause = (XLogCtl->recoveryPause != RECOVERY_IN_PROGRESS) ? true : false;
 	SpinLockRelease(&XLogCtl->info_lck);
 
 	return recoveryPause;
 }
 
 void
-SetRecoveryPause(bool recoveryPause)
+SetRecoveryPause(RecoveryPauseState recoveryPause)
 {
 	SpinLockAcquire(&XLogCtl->info_lck);
 	XLogCtl->recoveryPause = recoveryPause;
@@ -7423,7 +7453,7 @@ StartupXLOG(void)
 						proc_exit(3);
 
 					case RECOVERY_TARGET_ACTION_PAUSE:
-						SetRecoveryPause(true);
+						SetRecoveryPause(RECOVERY_PAUSE_REQUESTED);
 						recoveryPausesHere(true);
 
 						/* drop into promote */
diff --git a/src/backend/access/transam/xlogfuncs.c b/src/backend/access/transam/xlogfuncs.c
index 290658b..a01f146 100644
--- a/src/backend/access/transam/xlogfuncs.c
+++ b/src/backend/access/transam/xlogfuncs.c
@@ -538,7 +538,7 @@ pg_wal_replay_pause(PG_FUNCTION_ARGS)
 				 errhint("%s cannot be executed after promotion is triggered.",
 						 "pg_wal_replay_pause()")));
 
-	SetRecoveryPause(true);
+	SetRecoveryPause(RECOVERY_PAUSE_REQUESTED);
 
 	PG_RETURN_VOID();
 }
@@ -565,7 +565,7 @@ pg_wal_replay_resume(PG_FUNCTION_ARGS)
 				 errhint("%s cannot be executed after promotion is triggered.",
 						 "pg_wal_replay_resume()")));
 
-	SetRecoveryPause(false);
+	SetRecoveryPause(RECOVERY_IN_PROGRESS);
 
 	PG_RETURN_VOID();
 }
@@ -582,7 +582,29 @@ pg_is_wal_replay_paused(PG_FUNCTION_ARGS)
 				 errmsg("recovery is not in progress"),
 				 errhint("Recovery control functions can only be executed during recovery.")));
 
-	PG_RETURN_BOOL(RecoveryIsPaused());
+	PG_RETURN_BOOL(RecoveryPauseRequested());
+}
+
+/*
+ * pg_is_wal_replay_actually_paused
+ * FIXME: Better name for this interface??
+ */
+Datum
+pg_is_wal_replay_actually_paused(PG_FUNCTION_ARGS)
+{
+{
+	bool recovery_paused;
+
+	if (!RecoveryInProgress())
+		ereport(ERROR,
+				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+				 errmsg("recovery is not in progress"),
+				 errhint("Recovery control functions can only be executed during recovery.")));
+
+	recovery_paused = RecoveryIsPaused();
+
+	PG_RETURN_BOOL(recovery_paused);
+}
 }
 
 /*
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index 221af87..cd7904e 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -174,6 +174,14 @@ typedef enum RecoveryState
 	RECOVERY_STATE_DONE			/* currently in production */
 } RecoveryState;
 
+/* Recovery pause states */
+typedef enum RecoveryPauseState
+{
+	RECOVERY_IN_PROGRESS = 0,
+	RECOVERY_PAUSE_REQUESTED,
+	RECOVERY_PAUSED,
+} RecoveryPauseState;
+
 extern PGDLLIMPORT int wal_level;
 
 /* Is WAL archiving enabled (always or only while server is running normally)? */
@@ -311,7 +319,8 @@ extern XLogRecPtr GetXLogReplayRecPtr(TimeLineID *replayTLI);
 extern XLogRecPtr GetXLogInsertRecPtr(void);
 extern XLogRecPtr GetXLogWriteRecPtr(void);
 extern bool RecoveryIsPaused(void);
-extern void SetRecoveryPause(bool recoveryPause);
+extern bool RecoveryPauseRequested(void);
+extern void SetRecoveryPause(RecoveryPauseState recoveryPause);
 extern TimestampTz GetLatestXTime(void);
 extern TimestampTz GetCurrentChunkReplayStartTime(void);
 
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index bbcac69..a43318b 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -6173,6 +6173,10 @@
   proname => 'pg_is_wal_replay_paused', provolatile => 'v',
   prorettype => 'bool', proargtypes => '',
   prosrc => 'pg_is_wal_replay_paused' },
+{ oid => '3585', descr => 'true if wal replay is paused',
+  proname => 'pg_is_wal_replay_actually_paused', provolatile => 'v',
+  prorettype => 'bool', proargtypes => '',
+  prosrc => 'pg_is_wal_replay_actually_paused' },
 
 { oid => '2621', descr => 'reload configuration files',
   proname => 'pg_reload_conf', provolatile => 'v', prorettype => 'bool',
-- 
1.8.3.1

Reply via email to