On Tue, Oct 20, 2020 at 1:22 PM Dilip Kumar <dilipbal...@gmail.com> wrote: > > On Tue, Oct 20, 2020 at 1:11 PM Simon Riggs <si...@2ndquadrant.com> wrote: > > > > On Mon, 19 Oct 2020 at 15:11, Dilip Kumar <dilipbal...@gmail.com> wrote: > > > > > We have an interface to pause the WAL replay (pg_wal_replay_pause) and > > > to know whether the WAL replay pause is requested > > > (pg_is_wal_replay_paused). But there is no way to know whether the > > > recovery is actually paused or not. Actually, the recovery process > > > might process an extra WAL before pausing the recovery. So does it > > > make sense to provide a new interface to tell whether the recovery is > > > actually paused or not? > > > > > > One solution could be that we convert the XLogCtlData->recoveryPause > > > from bool to tri-state variable (0-> recovery not paused 1-> pause > > > requested 2-> actually paused). > > > > > > Any opinion on this? > > > > Why would we want this? What problem are you trying to solve? > > The requirement is to know the last replayed WAL on the standby so > unless we can guarantee that the recovery is actually paused we can > never get the safe last_replay_lsn value. > > > If we do care, why not fix pg_is_wal_replay_paused() so it responds as you > > wish? > > Maybe we can also do that but pg_is_wal_replay_paused is an existing > API and the behavior is to know whether the recovery paused is > requested or not, So I am not sure is it a good idea to change the > behavior of the existing API? >
Attached is the POC patch to show what I have in mind. -- Regards, Dilip Kumar EnterpriseDB: http://www.enterprisedb.com
From 7aeb50bb94bdf031408a54b517d5288d687462a2 Mon Sep 17 00:00:00 2001 From: Dilip Kumar <dilipkumar@localhost.localdomain> Date: Tue, 20 Oct 2020 14:10:14 +0530 Subject: [PATCH] New interface to know wal recovery actually paused --- src/backend/access/transam/xlog.c | 42 +++++++++++++++++++++++++++++----- src/backend/access/transam/xlogfuncs.c | 28 ++++++++++++++++++++--- src/include/access/xlog.h | 11 ++++++++- src/include/catalog/pg_proc.dat | 4 ++++ 4 files changed, 75 insertions(+), 10 deletions(-) diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 52a67b1..4c50eb9 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -714,8 +714,8 @@ typedef struct XLogCtlData * only relevant for replication or archive recovery */ TimestampTz currentChunkStartTime; - /* Are we requested to pause recovery? */ - bool recoveryPause; + /* Recovery pause state */ + RecoveryPauseState recoveryPause; /* * lastFpwDisableRecPtr points to the start of the last replayed @@ -5994,6 +5994,16 @@ recoveryStopsAfter(XLogReaderState *record) return false; } +static void +CheckAndSetRecoveryPause(void) +{ + /* If recovery pause is requested then set it paused */ + SpinLockAcquire(&XLogCtl->info_lck); + if (XLogCtl->recoveryPause == RECOVERY_PAUSE_REQUESTED) + XLogCtl->recoveryPause = RECOVERY_PAUSED; + SpinLockRelease(&XLogCtl->info_lck); +} + /* * Wait until shared recoveryPause flag is cleared. * @@ -6025,12 +6035,20 @@ recoveryPausesHere(bool endOfRecovery) (errmsg("recovery has paused"), errhint("Execute pg_wal_replay_resume() to continue."))); - while (RecoveryIsPaused()) + while (RecoveryPauseRequested()) { + HandleStartupProcInterrupts(); + if (CheckForStandbyTrigger()) return; pgstat_report_wait_start(WAIT_EVENT_RECOVERY_PAUSE); + + /* + * While we are in the loop, user might resume and pause again so set + * this everytime. + */ + CheckAndSetRecoveryPause(); pg_usleep(1000000L); /* 1000 ms */ pgstat_report_wait_end(); } @@ -6039,17 +6057,29 @@ recoveryPausesHere(bool endOfRecovery) bool RecoveryIsPaused(void) { + bool recoveryPause; + + SpinLockAcquire(&XLogCtl->info_lck); + recoveryPause = (XLogCtl->recoveryPause == RECOVERY_PAUSED) ? true : false; + SpinLockRelease(&XLogCtl->info_lck); + + return recoveryPause; +} + +bool +RecoveryPauseRequested(void) +{ bool recoveryPause; SpinLockAcquire(&XLogCtl->info_lck); - recoveryPause = XLogCtl->recoveryPause; + recoveryPause = (XLogCtl->recoveryPause != RECOVERY_IN_PROGRESS) ? true : false; SpinLockRelease(&XLogCtl->info_lck); return recoveryPause; } void -SetRecoveryPause(bool recoveryPause) +SetRecoveryPause(RecoveryPauseState recoveryPause) { SpinLockAcquire(&XLogCtl->info_lck); XLogCtl->recoveryPause = recoveryPause; @@ -7423,7 +7453,7 @@ StartupXLOG(void) proc_exit(3); case RECOVERY_TARGET_ACTION_PAUSE: - SetRecoveryPause(true); + SetRecoveryPause(RECOVERY_PAUSE_REQUESTED); recoveryPausesHere(true); /* drop into promote */ diff --git a/src/backend/access/transam/xlogfuncs.c b/src/backend/access/transam/xlogfuncs.c index 290658b..a01f146 100644 --- a/src/backend/access/transam/xlogfuncs.c +++ b/src/backend/access/transam/xlogfuncs.c @@ -538,7 +538,7 @@ pg_wal_replay_pause(PG_FUNCTION_ARGS) errhint("%s cannot be executed after promotion is triggered.", "pg_wal_replay_pause()"))); - SetRecoveryPause(true); + SetRecoveryPause(RECOVERY_PAUSE_REQUESTED); PG_RETURN_VOID(); } @@ -565,7 +565,7 @@ pg_wal_replay_resume(PG_FUNCTION_ARGS) errhint("%s cannot be executed after promotion is triggered.", "pg_wal_replay_resume()"))); - SetRecoveryPause(false); + SetRecoveryPause(RECOVERY_IN_PROGRESS); PG_RETURN_VOID(); } @@ -582,7 +582,29 @@ pg_is_wal_replay_paused(PG_FUNCTION_ARGS) errmsg("recovery is not in progress"), errhint("Recovery control functions can only be executed during recovery."))); - PG_RETURN_BOOL(RecoveryIsPaused()); + PG_RETURN_BOOL(RecoveryPauseRequested()); +} + +/* + * pg_is_wal_replay_actually_paused + * FIXME: Better name for this interface?? + */ +Datum +pg_is_wal_replay_actually_paused(PG_FUNCTION_ARGS) +{ +{ + bool recovery_paused; + + if (!RecoveryInProgress()) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("recovery is not in progress"), + errhint("Recovery control functions can only be executed during recovery."))); + + recovery_paused = RecoveryIsPaused(); + + PG_RETURN_BOOL(recovery_paused); +} } /* diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h index 221af87..cd7904e 100644 --- a/src/include/access/xlog.h +++ b/src/include/access/xlog.h @@ -174,6 +174,14 @@ typedef enum RecoveryState RECOVERY_STATE_DONE /* currently in production */ } RecoveryState; +/* Recovery pause states */ +typedef enum RecoveryPauseState +{ + RECOVERY_IN_PROGRESS = 0, + RECOVERY_PAUSE_REQUESTED, + RECOVERY_PAUSED, +} RecoveryPauseState; + extern PGDLLIMPORT int wal_level; /* Is WAL archiving enabled (always or only while server is running normally)? */ @@ -311,7 +319,8 @@ extern XLogRecPtr GetXLogReplayRecPtr(TimeLineID *replayTLI); extern XLogRecPtr GetXLogInsertRecPtr(void); extern XLogRecPtr GetXLogWriteRecPtr(void); extern bool RecoveryIsPaused(void); -extern void SetRecoveryPause(bool recoveryPause); +extern bool RecoveryPauseRequested(void); +extern void SetRecoveryPause(RecoveryPauseState recoveryPause); extern TimestampTz GetLatestXTime(void); extern TimestampTz GetCurrentChunkReplayStartTime(void); diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index bbcac69..a43318b 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -6173,6 +6173,10 @@ proname => 'pg_is_wal_replay_paused', provolatile => 'v', prorettype => 'bool', proargtypes => '', prosrc => 'pg_is_wal_replay_paused' }, +{ oid => '3585', descr => 'true if wal replay is paused', + proname => 'pg_is_wal_replay_actually_paused', provolatile => 'v', + prorettype => 'bool', proargtypes => '', + prosrc => 'pg_is_wal_replay_actually_paused' }, { oid => '2621', descr => 'reload configuration files', proname => 'pg_reload_conf', provolatile => 'v', prorettype => 'bool', -- 1.8.3.1