On Wed, Oct 19, 2022 at 6:30 PM Alvaro Herrera <alvhe...@alvh.no-ip.org> wrote:
>
> 0001 seems mostly OK, but I don't like some of these new function names.
> I see you've named them so that they are case-consistent with the name
> of the struct member that they affect, but I don't think that's a good
> criterion.  I propose
>
> SetrunningBackups -> XLogBackupSetRunning()
> ResetXLogBackupActivity -> XLogBackupNotRunning()
> // or maybe SetNotRunning, or ResetRunning?  I prefer the one above
> SetlastBackupStart -> XLogBackupSetLastStart()
>
> GetlastFpwDisableRecPtr -> XLogGetLastFPWDisableRecptr()
> GetminRecoveryPoint -> XLogGetMinRecoveryPoint()

XLogBackupResetRunning() seemed better. +1 for above function names.

> I wouldn't say in the xlog_internal.h comment that these new functions
> are for xlogbackup.c to use.  The API definition doesn't have to concern
> itself with that.  Maybe one day xlogrecovery.c or some other xlog*.c
> would like to call those functions, and then the comment becomes a lie;
> and what for?

Removed.

> 0002 is where the interesting stuff happens.  I have not reviewed that
> part with any care, but it appears that set_backup_state is pretty much
> useless.  Let's get rid of it instead of moving it.  Which also means
> that we shouldn't introduce reset_backup_status in 0001, I suppose.
> I think xlogfuncs.c is content with having just get_backup_status().

There's no set_backup_state() at all. We need get_backup_status() for
xlogfuncs.c and basebackup.c and we need reset_backup_status() for
XLogBackupResetRunning() sitting in xlog.c.

> Speaking of which -- I'm not sure we really want to do 0003.
> xlogfuncs.c is not a big file, the functions are not complex, and there
> are no interesting interactions in those functions with the internals
> (other than get_backup_status).  I see that Michael advised the same.
> I propose we keep those functions where they are.

I'm okay either way.

Please see the attached v8 patch set.

--
Bharath Rupireddy
PostgreSQL Contributors Team
RDS Open Source Databases
Amazon Web Services: https://aws.amazon.com
From ac3915af151f1444bcb04be6c366cc63ffe9e661 Mon Sep 17 00:00:00 2001
From: Bharath Rupireddy <bharath.rupireddyforpostgres@gmail.com>
Date: Wed, 19 Oct 2022 14:56:39 +0000
Subject: [PATCH v8] Add functions for xlogbackup.c to call back into xlog.c

---
 src/backend/access/transam/xlog.c  | 175 ++++++++++++++++++++---------
 src/include/access/xlog.h          |   1 +
 src/include/access/xlog_internal.h |   8 ++
 3 files changed, 131 insertions(+), 53 deletions(-)

diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index dea978a962..8475dfe5c1 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -8311,9 +8311,7 @@ do_pg_backup_start(const char *backupidstr, bool fast, List **tablespaces,
 	 * runningBackups, to ensure adequate interlocking against
 	 * XLogInsertRecord().
 	 */
-	WALInsertLockAcquireExclusive();
-	XLogCtl->Insert.runningBackups++;
-	WALInsertLockRelease();
+	XLogBackupSetRunning();
 
 	/*
 	 * Ensure we decrement runningBackups if we fail below. NB -- for this to
@@ -8383,12 +8381,8 @@ do_pg_backup_start(const char *backupidstr, bool fast, List **tablespaces,
 			 * to restore starting from the checkpoint is precisely the REDO
 			 * pointer.
 			 */
-			LWLockAcquire(ControlFileLock, LW_SHARED);
-			state->checkpointloc = ControlFile->checkPoint;
-			state->startpoint = ControlFile->checkPointCopy.redo;
-			state->starttli = ControlFile->checkPointCopy.ThisTimeLineID;
-			checkpointfpw = ControlFile->checkPointCopy.fullPageWrites;
-			LWLockRelease(ControlFileLock);
+			GetCheckpointLocation(&state->checkpointloc, &state->startpoint,
+								  &state->starttli, &checkpointfpw);
 
 			if (backup_started_in_recovery)
 			{
@@ -8399,9 +8393,7 @@ do_pg_backup_start(const char *backupidstr, bool fast, List **tablespaces,
 				 * (i.e., since last restartpoint used as backup starting
 				 * checkpoint) contain full-page writes.
 				 */
-				SpinLockAcquire(&XLogCtl->info_lck);
-				recptr = XLogCtl->lastFpwDisableRecPtr;
-				SpinLockRelease(&XLogCtl->info_lck);
+				recptr = XLogGetLastFPWDisableRecptr();
 
 				if (!checkpointfpw || state->startpoint <= recptr)
 					ereport(ERROR,
@@ -8434,13 +8426,7 @@ do_pg_backup_start(const char *backupidstr, bool fast, List **tablespaces,
 			 * taking a checkpoint right after another is not that expensive
 			 * either because only few buffers have been dirtied yet.
 			 */
-			WALInsertLockAcquireExclusive();
-			if (XLogCtl->Insert.lastBackupStart < state->startpoint)
-			{
-				XLogCtl->Insert.lastBackupStart = state->startpoint;
-				gotUniqueStartpoint = true;
-			}
-			WALInsertLockRelease();
+			gotUniqueStartpoint = XLogBackupSetLastStart(state->startpoint);
 		} while (!gotUniqueStartpoint);
 
 		/*
@@ -8549,6 +8535,15 @@ get_backup_status(void)
 	return sessionBackupState;
 }
 
+/*
+ * Utility routine to reset the session-level status of a backup running.
+ */
+void
+reset_backup_status(void)
+{
+	sessionBackupState = SESSION_BACKUP_NONE;
+}
+
 /*
  * do_pg_backup_stop
  *
@@ -8590,33 +8585,16 @@ do_pg_backup_stop(BackupState *state, bool waitforarchive)
 				 errhint("wal_level must be set to \"replica\" or \"logical\" at server start.")));
 
 	/*
-	 * OK to update backup counter and session-level lock.
+	 * OK to reset backup counter and session-level lock.
 	 *
 	 * Note that CHECK_FOR_INTERRUPTS() must not occur while updating them,
 	 * otherwise they can be updated inconsistently, which might cause
 	 * do_pg_abort_backup() to fail.
-	 */
-	WALInsertLockAcquireExclusive();
-
-	/*
+	 *
 	 * It is expected that each do_pg_backup_start() call is matched by
 	 * exactly one do_pg_backup_stop() call.
 	 */
-	Assert(XLogCtl->Insert.runningBackups > 0);
-	XLogCtl->Insert.runningBackups--;
-
-	/*
-	 * Clean up session-level lock.
-	 *
-	 * You might think that WALInsertLockRelease() can be called before
-	 * cleaning up session-level lock because session-level lock doesn't need
-	 * to be protected with WAL insertion lock. But since
-	 * CHECK_FOR_INTERRUPTS() can occur in it, session-level lock must be
-	 * cleaned up before it.
-	 */
-	sessionBackupState = SESSION_BACKUP_NONE;
-
-	WALInsertLockRelease();
+	XLogBackupResetRunning();
 
 	/*
 	 * If we are taking an online backup from the standby, we confirm that the
@@ -8666,9 +8644,7 @@ do_pg_backup_stop(BackupState *state, bool waitforarchive)
 		 * Check to see if all WAL replayed during online backup contain
 		 * full-page writes.
 		 */
-		SpinLockAcquire(&XLogCtl->info_lck);
-		recptr = XLogCtl->lastFpwDisableRecPtr;
-		SpinLockRelease(&XLogCtl->info_lck);
+		recptr = XLogGetLastFPWDisableRecptr();
 
 		if (state->startpoint <= recptr)
 			ereport(ERROR,
@@ -8681,10 +8657,7 @@ do_pg_backup_stop(BackupState *state, bool waitforarchive)
 							 "and then try an online backup again.")));
 
 
-		LWLockAcquire(ControlFileLock, LW_SHARED);
-		state->stoppoint = ControlFile->minRecoveryPoint;
-		state->stoptli = ControlFile->minRecoveryPointTLI;
-		LWLockRelease(ControlFileLock);
+		XLogGetMinRecoveryPoint(&state->stoppoint, &state->stoptli);
 	}
 	else
 	{
@@ -8702,7 +8675,7 @@ do_pg_backup_stop(BackupState *state, bool waitforarchive)
 		 * Given that we're not in recovery, InsertTimeLineID is set and can't
 		 * change, so we can read it without a lock.
 		 */
-		state->stoptli = XLogCtl->InsertTimeLineID;
+		state->stoptli = GetWALInsertionTimeLine();
 
 		/*
 		 * Force a switch to a new xlog segment file, so that the backup is
@@ -8847,12 +8820,7 @@ do_pg_abort_backup(int code, Datum arg)
 
 	if (during_backup_start || sessionBackupState != SESSION_BACKUP_NONE)
 	{
-		WALInsertLockAcquireExclusive();
-		Assert(XLogCtl->Insert.runningBackups > 0);
-		XLogCtl->Insert.runningBackups--;
-
-		sessionBackupState = SESSION_BACKUP_NONE;
-		WALInsertLockRelease();
+		XLogBackupResetRunning();
 
 		if (!during_backup_start)
 			ereport(WARNING,
@@ -8875,6 +8843,107 @@ register_persistent_abort_backup_handler(void)
 	already_done = true;
 }
 
+/*
+ * Get the checkpoint location.
+ */
+void
+GetCheckpointLocation(XLogRecPtr *loc, XLogRecPtr *redoloc,
+					  TimeLineID *tli, bool *fpw)
+{
+	LWLockAcquire(ControlFileLock, LW_SHARED);
+	*loc = ControlFile->checkPoint;
+	*redoloc = ControlFile->checkPointCopy.redo;
+	*tli = ControlFile->checkPointCopy.ThisTimeLineID;
+	*fpw = ControlFile->checkPointCopy.fullPageWrites;
+	LWLockRelease(ControlFileLock);
+}
+
+/*
+ * Get the minRecoveryPoint and minRecoveryPointTLI.
+ */
+void
+XLogGetMinRecoveryPoint(XLogRecPtr *loc, TimeLineID *tli)
+{
+	LWLockAcquire(ControlFileLock, LW_SHARED);
+	*loc = ControlFile->minRecoveryPoint;
+	*tli = ControlFile->minRecoveryPointTLI;
+	LWLockRelease(ControlFileLock);
+}
+
+/*
+ * Get the lastFpwDisableRecPtr.
+ */
+XLogRecPtr
+XLogGetLastFPWDisableRecptr(void)
+{
+	XLogRecPtr recptr;
+
+	SpinLockAcquire(&XLogCtl->info_lck);
+	recptr = XLogCtl->lastFpwDisableRecPtr;
+	SpinLockRelease(&XLogCtl->info_lck);
+
+	return recptr;
+}
+
+/*
+ * Set the lastBackupStar only if it is less than passed-in rectpr and return
+ * true. Otherwise return false.
+ *
+ * Note: For those who want to set lastBackupStar unconditionally, pass rectpr
+ * value as PG_UINT64_MAX, which is higher than any real XLogRecPtr value.
+ */
+bool
+XLogBackupSetLastStart(XLogRecPtr recptr)
+{
+	bool is_set = false;
+
+	WALInsertLockAcquireExclusive();
+	if (XLogCtl->Insert.lastBackupStart < recptr)
+	{
+		XLogCtl->Insert.lastBackupStart = recptr;
+		is_set = true;
+	}
+	WALInsertLockRelease();
+
+	return is_set;
+}
+
+/*
+ * Set the runningBackups.
+ */
+void
+XLogBackupSetRunning(void)
+{
+	WALInsertLockAcquireExclusive();
+	XLogCtl->Insert.runningBackups++;
+	WALInsertLockRelease();
+}
+
+/*
+ * Reset backup activity such as runningBackups and session-level lock.
+ */
+void
+XLogBackupResetRunning(void)
+{
+	WALInsertLockAcquireExclusive();
+
+	Assert(XLogCtl->Insert.runningBackups > 0);
+	XLogCtl->Insert.runningBackups--;
+
+	/*
+	 * Reset session-level lock.
+	 *
+	 * You might think that WALInsertLockRelease() can be called before
+	 * cleaning up session-level lock because session-level lock doesn't need
+	 * to be protected with WAL insertion lock. But since
+	 * CHECK_FOR_INTERRUPTS() can occur in it, session-level lock must be
+	 * cleaned up before it.
+	 */
+	reset_backup_status();
+
+	WALInsertLockRelease();
+}
+
 /*
  * Get latest WAL insert pointer
  */
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index 1fbd48fbda..b46adca291 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -285,6 +285,7 @@ extern void do_pg_backup_stop(BackupState *state, bool waitforarchive);
 extern void do_pg_abort_backup(int code, Datum arg);
 extern void register_persistent_abort_backup_handler(void);
 extern SessionBackupState get_backup_status(void);
+extern void reset_backup_status(void);
 
 /* File path names (all relative to $PGDATA) */
 #define RECOVERY_SIGNAL_FILE	"recovery.signal"
diff --git a/src/include/access/xlog_internal.h b/src/include/access/xlog_internal.h
index 44291b337b..8b5905357e 100644
--- a/src/include/access/xlog_internal.h
+++ b/src/include/access/xlog_internal.h
@@ -401,4 +401,12 @@ extern PGDLLIMPORT bool InArchiveRecovery;
 extern PGDLLIMPORT bool StandbyMode;
 extern PGDLLIMPORT char *recoveryRestoreCommand;
 
+extern void GetCheckpointLocation(XLogRecPtr *loc, XLogRecPtr *redoloc,
+								  TimeLineID *tli, bool *fpw);
+extern void XLogGetMinRecoveryPoint(XLogRecPtr *loc, TimeLineID *tli);
+extern XLogRecPtr XLogGetLastFPWDisableRecptr(void);
+extern bool XLogBackupSetLastStart(XLogRecPtr recptr);
+extern void XLogBackupSetRunning(void);
+extern void XLogBackupResetRunning(void);
+
 #endif							/* XLOG_INTERNAL_H */
-- 
2.34.1

From cdf4661428387ae5192a5810090ed4f113ef2274 Mon Sep 17 00:00:00 2001
From: Bharath Rupireddy <bharath.rupireddyforpostgres@gmail.com>
Date: Wed, 19 Oct 2022 15:06:41 +0000
Subject: [PATCH v8] Move backup-related code from xlog.c to xlogbackup.c

---
 src/backend/access/transam/xlog.c       | 652 +----------------------
 src/backend/access/transam/xlogbackup.c | 663 ++++++++++++++++++++++++
 src/include/access/xlog.h               |  30 --
 src/include/access/xlogbackup.h         |  29 ++
 4 files changed, 694 insertions(+), 680 deletions(-)

diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 8475dfe5c1..1f552812e9 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -57,20 +57,20 @@
 #include "access/twophase.h"
 #include "access/xact.h"
 #include "access/xlog_internal.h"
+#include "access/xlogbackup.h"
 #include "access/xlogarchive.h"
 #include "access/xloginsert.h"
 #include "access/xlogprefetcher.h"
 #include "access/xlogreader.h"
 #include "access/xlogrecovery.h"
 #include "access/xlogutils.h"
-#include "backup/basebackup.h"
 #include "catalog/catversion.h"
-#include "catalog/pg_control.h"
 #include "catalog/pg_database.h"
 #include "common/controldata_utils.h"
 #include "common/file_utils.h"
 #include "executor/instrument.h"
 #include "miscadmin.h"
+#include "nodes/pg_list.h"
 #include "pg_trace.h"
 #include "pgstat.h"
 #include "port/atomics.h"
@@ -86,7 +86,6 @@
 #include "replication/walsender.h"
 #include "storage/bufmgr.h"
 #include "storage/fd.h"
-#include "storage/ipc.h"
 #include "storage/large_object.h"
 #include "storage/latch.h"
 #include "storage/pmsignal.h"
@@ -393,12 +392,6 @@ typedef union WALInsertLockPadded
 	char		pad[PG_CACHE_LINE_SIZE];
 } WALInsertLockPadded;
 
-/*
- * Session status of running backup, used for sanity checks in SQL-callable
- * functions to start and stop backups.
- */
-static SessionBackupState sessionBackupState = SESSION_BACKUP_NONE;
-
 /*
  * Shared state data for WAL insertion.
  */
@@ -668,7 +661,6 @@ static void RemoveXlogFile(const struct dirent *segment_de,
 						   TimeLineID insertTLI);
 static void UpdateLastRemovedPtr(char *filename);
 static void ValidateXLOGDirectoryStructure(void);
-static void CleanupBackupHistory(void);
 static void UpdateMinRecoveryPoint(XLogRecPtr lsn, bool force);
 static bool PerformRecoveryXLogAction(void);
 static void InitControlFile(uint64 sysidentifier);
@@ -3817,38 +3809,6 @@ ValidateXLOGDirectoryStructure(void)
 	}
 }
 
-/*
- * Remove previous backup history files.  This also retries creation of
- * .ready files for any backup history files for which XLogArchiveNotify
- * failed earlier.
- */
-static void
-CleanupBackupHistory(void)
-{
-	DIR		   *xldir;
-	struct dirent *xlde;
-	char		path[MAXPGPATH + sizeof(XLOGDIR)];
-
-	xldir = AllocateDir(XLOGDIR);
-
-	while ((xlde = ReadDir(xldir, XLOGDIR)) != NULL)
-	{
-		if (IsBackupHistoryFileName(xlde->d_name))
-		{
-			if (XLogArchiveCheckDone(xlde->d_name))
-			{
-				elog(DEBUG2, "removing WAL backup history file \"%s\"",
-					 xlde->d_name);
-				snprintf(path, sizeof(path), XLOGDIR "/%s", xlde->d_name);
-				unlink(path);
-				XLogArchiveCleanup(xlde->d_name);
-			}
-		}
-	}
-
-	FreeDir(xldir);
-}
-
 /*
  * I/O routines for pg_control
  *
@@ -8235,614 +8195,6 @@ issue_xlog_fsync(int fd, XLogSegNo segno, TimeLineID tli)
 	PendingWalStats.wal_sync++;
 }
 
-/*
- * do_pg_backup_start is the workhorse of the user-visible pg_backup_start()
- * function. It creates the necessary starting checkpoint and constructs the
- * backup state and tablespace map.
- *
- * Input parameters are "state" (the backup state), "fast" (if true, we do
- * the checkpoint in immediate mode to make it faster), and "tablespaces"
- * (if non-NULL, indicates a list of tablespaceinfo structs describing the
- * cluster's tablespaces.).
- *
- * The tablespace map contents are appended to passed-in parameter
- * tablespace_map and the caller is responsible for including it in the backup
- * archive as 'tablespace_map'. The tablespace_map file is required mainly for
- * tar format in windows as native windows utilities are not able to create
- * symlinks while extracting files from tar. However for consistency and
- * platform-independence, we do it the same way everywhere.
- *
- * It fills in "state" with the information required for the backup, such
- * as the minimum WAL location that must be present to restore from this
- * backup (starttli) and the corresponding timeline ID (starttli).
- *
- * Every successfully started backup must be stopped by calling
- * do_pg_backup_stop() or do_pg_abort_backup(). There can be many
- * backups active at the same time.
- *
- * It is the responsibility of the caller of this function to verify the
- * permissions of the calling user!
- */
-void
-do_pg_backup_start(const char *backupidstr, bool fast, List **tablespaces,
-				   BackupState *state, StringInfo tblspcmapfile)
-{
-	bool		backup_started_in_recovery;
-
-	Assert(state != NULL);
-	backup_started_in_recovery = RecoveryInProgress();
-
-	/*
-	 * During recovery, we don't need to check WAL level. Because, if WAL
-	 * level is not sufficient, it's impossible to get here during recovery.
-	 */
-	if (!backup_started_in_recovery && !XLogIsNeeded())
-		ereport(ERROR,
-				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
-				 errmsg("WAL level not sufficient for making an online backup"),
-				 errhint("wal_level must be set to \"replica\" or \"logical\" at server start.")));
-
-	if (strlen(backupidstr) > MAXPGPATH)
-		ereport(ERROR,
-				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
-				 errmsg("backup label too long (max %d bytes)",
-						MAXPGPATH)));
-
-	memcpy(state->name, backupidstr, strlen(backupidstr));
-
-	/*
-	 * Mark backup active in shared memory.  We must do full-page WAL writes
-	 * during an on-line backup even if not doing so at other times, because
-	 * it's quite possible for the backup dump to obtain a "torn" (partially
-	 * written) copy of a database page if it reads the page concurrently with
-	 * our write to the same page.  This can be fixed as long as the first
-	 * write to the page in the WAL sequence is a full-page write. Hence, we
-	 * increment runningBackups then force a CHECKPOINT, to ensure there are
-	 * no dirty pages in shared memory that might get dumped while the backup
-	 * is in progress without having a corresponding WAL record.  (Once the
-	 * backup is complete, we need not force full-page writes anymore, since
-	 * we expect that any pages not modified during the backup interval must
-	 * have been correctly captured by the backup.)
-	 *
-	 * Note that forcing full-page writes has no effect during an online
-	 * backup from the standby.
-	 *
-	 * We must hold all the insertion locks to change the value of
-	 * runningBackups, to ensure adequate interlocking against
-	 * XLogInsertRecord().
-	 */
-	XLogBackupSetRunning();
-
-	/*
-	 * Ensure we decrement runningBackups if we fail below. NB -- for this to
-	 * work correctly, it is critical that sessionBackupState is only updated
-	 * after this block is over.
-	 */
-	PG_ENSURE_ERROR_CLEANUP(do_pg_abort_backup, DatumGetBool(true));
-	{
-		bool		gotUniqueStartpoint = false;
-		DIR		   *tblspcdir;
-		struct dirent *de;
-		tablespaceinfo *ti;
-		int			datadirpathlen;
-
-		/*
-		 * Force an XLOG file switch before the checkpoint, to ensure that the
-		 * WAL segment the checkpoint is written to doesn't contain pages with
-		 * old timeline IDs.  That would otherwise happen if you called
-		 * pg_backup_start() right after restoring from a PITR archive: the
-		 * first WAL segment containing the startup checkpoint has pages in
-		 * the beginning with the old timeline ID.  That can cause trouble at
-		 * recovery: we won't have a history file covering the old timeline if
-		 * pg_wal directory was not included in the base backup and the WAL
-		 * archive was cleared too before starting the backup.
-		 *
-		 * This also ensures that we have emitted a WAL page header that has
-		 * XLP_BKP_REMOVABLE off before we emit the checkpoint record.
-		 * Therefore, if a WAL archiver (such as pglesslog) is trying to
-		 * compress out removable backup blocks, it won't remove any that
-		 * occur after this point.
-		 *
-		 * During recovery, we skip forcing XLOG file switch, which means that
-		 * the backup taken during recovery is not available for the special
-		 * recovery case described above.
-		 */
-		if (!backup_started_in_recovery)
-			RequestXLogSwitch(false);
-
-		do
-		{
-			bool		checkpointfpw;
-
-			/*
-			 * Force a CHECKPOINT.  Aside from being necessary to prevent torn
-			 * page problems, this guarantees that two successive backup runs
-			 * will have different checkpoint positions and hence different
-			 * history file names, even if nothing happened in between.
-			 *
-			 * During recovery, establish a restartpoint if possible. We use
-			 * the last restartpoint as the backup starting checkpoint. This
-			 * means that two successive backup runs can have same checkpoint
-			 * positions.
-			 *
-			 * Since the fact that we are executing do_pg_backup_start()
-			 * during recovery means that checkpointer is running, we can use
-			 * RequestCheckpoint() to establish a restartpoint.
-			 *
-			 * We use CHECKPOINT_IMMEDIATE only if requested by user (via
-			 * passing fast = true).  Otherwise this can take awhile.
-			 */
-			RequestCheckpoint(CHECKPOINT_FORCE | CHECKPOINT_WAIT |
-							  (fast ? CHECKPOINT_IMMEDIATE : 0));
-
-			/*
-			 * Now we need to fetch the checkpoint record location, and also
-			 * its REDO pointer.  The oldest point in WAL that would be needed
-			 * to restore starting from the checkpoint is precisely the REDO
-			 * pointer.
-			 */
-			GetCheckpointLocation(&state->checkpointloc, &state->startpoint,
-								  &state->starttli, &checkpointfpw);
-
-			if (backup_started_in_recovery)
-			{
-				XLogRecPtr	recptr;
-
-				/*
-				 * Check to see if all WAL replayed during online backup
-				 * (i.e., since last restartpoint used as backup starting
-				 * checkpoint) contain full-page writes.
-				 */
-				recptr = XLogGetLastFPWDisableRecptr();
-
-				if (!checkpointfpw || state->startpoint <= recptr)
-					ereport(ERROR,
-							(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
-							 errmsg("WAL generated with full_page_writes=off was replayed "
-									"since last restartpoint"),
-							 errhint("This means that the backup being taken on the standby "
-									 "is corrupt and should not be used. "
-									 "Enable full_page_writes and run CHECKPOINT on the primary, "
-									 "and then try an online backup again.")));
-
-				/*
-				 * During recovery, since we don't use the end-of-backup WAL
-				 * record and don't write the backup history file, the
-				 * starting WAL location doesn't need to be unique. This means
-				 * that two base backups started at the same time might use
-				 * the same checkpoint as starting locations.
-				 */
-				gotUniqueStartpoint = true;
-			}
-
-			/*
-			 * If two base backups are started at the same time (in WAL sender
-			 * processes), we need to make sure that they use different
-			 * checkpoints as starting locations, because we use the starting
-			 * WAL location as a unique identifier for the base backup in the
-			 * end-of-backup WAL record and when we write the backup history
-			 * file. Perhaps it would be better generate a separate unique ID
-			 * for each backup instead of forcing another checkpoint, but
-			 * taking a checkpoint right after another is not that expensive
-			 * either because only few buffers have been dirtied yet.
-			 */
-			gotUniqueStartpoint = XLogBackupSetLastStart(state->startpoint);
-		} while (!gotUniqueStartpoint);
-
-		/*
-		 * Construct tablespace_map file.
-		 */
-		datadirpathlen = strlen(DataDir);
-
-		/* Collect information about all tablespaces */
-		tblspcdir = AllocateDir("pg_tblspc");
-		while ((de = ReadDir(tblspcdir, "pg_tblspc")) != NULL)
-		{
-			char		fullpath[MAXPGPATH + 10];
-			char		linkpath[MAXPGPATH];
-			char	   *relpath = NULL;
-			int			rllen;
-			StringInfoData escapedpath;
-			char	   *s;
-
-			/* Skip anything that doesn't look like a tablespace */
-			if (strspn(de->d_name, "0123456789") != strlen(de->d_name))
-				continue;
-
-			snprintf(fullpath, sizeof(fullpath), "pg_tblspc/%s", de->d_name);
-
-			/*
-			 * Skip anything that isn't a symlink/junction.  For testing only,
-			 * we sometimes use allow_in_place_tablespaces to create
-			 * directories directly under pg_tblspc, which would fail below.
-			 */
-			if (get_dirent_type(fullpath, de, false, ERROR) != PGFILETYPE_LNK)
-				continue;
-
-			rllen = readlink(fullpath, linkpath, sizeof(linkpath));
-			if (rllen < 0)
-			{
-				ereport(WARNING,
-						(errmsg("could not read symbolic link \"%s\": %m",
-								fullpath)));
-				continue;
-			}
-			else if (rllen >= sizeof(linkpath))
-			{
-				ereport(WARNING,
-						(errmsg("symbolic link \"%s\" target is too long",
-								fullpath)));
-				continue;
-			}
-			linkpath[rllen] = '\0';
-
-			/*
-			 * Build a backslash-escaped version of the link path to include
-			 * in the tablespace map file.
-			 */
-			initStringInfo(&escapedpath);
-			for (s = linkpath; *s; s++)
-			{
-				if (*s == '\n' || *s == '\r' || *s == '\\')
-					appendStringInfoChar(&escapedpath, '\\');
-				appendStringInfoChar(&escapedpath, *s);
-			}
-
-			/*
-			 * Relpath holds the relative path of the tablespace directory
-			 * when it's located within PGDATA, or NULL if it's located
-			 * elsewhere.
-			 */
-			if (rllen > datadirpathlen &&
-				strncmp(linkpath, DataDir, datadirpathlen) == 0 &&
-				IS_DIR_SEP(linkpath[datadirpathlen]))
-				relpath = linkpath + datadirpathlen + 1;
-
-			ti = palloc(sizeof(tablespaceinfo));
-			ti->oid = pstrdup(de->d_name);
-			ti->path = pstrdup(linkpath);
-			ti->rpath = relpath ? pstrdup(relpath) : NULL;
-			ti->size = -1;
-
-			if (tablespaces)
-				*tablespaces = lappend(*tablespaces, ti);
-
-			appendStringInfo(tblspcmapfile, "%s %s\n",
-							 ti->oid, escapedpath.data);
-
-			pfree(escapedpath.data);
-		}
-		FreeDir(tblspcdir);
-
-		state->starttime = (pg_time_t) time(NULL);
-	}
-	PG_END_ENSURE_ERROR_CLEANUP(do_pg_abort_backup, DatumGetBool(true));
-
-	state->started_in_recovery = backup_started_in_recovery;
-
-	/*
-	 * Mark that the start phase has correctly finished for the backup.
-	 */
-	sessionBackupState = SESSION_BACKUP_RUNNING;
-}
-
-/*
- * Utility routine to fetch the session-level status of a backup running.
- */
-SessionBackupState
-get_backup_status(void)
-{
-	return sessionBackupState;
-}
-
-/*
- * Utility routine to reset the session-level status of a backup running.
- */
-void
-reset_backup_status(void)
-{
-	sessionBackupState = SESSION_BACKUP_NONE;
-}
-
-/*
- * do_pg_backup_stop
- *
- * Utility function called at the end of an online backup.  It creates history
- * file (if required), resets sessionBackupState and so on.  It can optionally
- * wait for WAL segments to be archived.
- *
- * "state" is filled with the information necessary to restore from this
- * backup with its stop LSN (stoppoint), its timeline ID (stoptli), etc.
- *
- * It is the responsibility of the caller of this function to verify the
- * permissions of the calling user!
- */
-void
-do_pg_backup_stop(BackupState *state, bool waitforarchive)
-{
-	bool		backup_stopped_in_recovery = false;
-	char		histfilepath[MAXPGPATH];
-	char		lastxlogfilename[MAXFNAMELEN];
-	char		histfilename[MAXFNAMELEN];
-	XLogSegNo	_logSegNo;
-	FILE	   *fp;
-	int			seconds_before_warning;
-	int			waits = 0;
-	bool		reported_waiting = false;
-
-	Assert(state != NULL);
-
-	backup_stopped_in_recovery = RecoveryInProgress();
-
-	/*
-	 * During recovery, we don't need to check WAL level. Because, if WAL
-	 * level is not sufficient, it's impossible to get here during recovery.
-	 */
-	if (!backup_stopped_in_recovery && !XLogIsNeeded())
-		ereport(ERROR,
-				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
-				 errmsg("WAL level not sufficient for making an online backup"),
-				 errhint("wal_level must be set to \"replica\" or \"logical\" at server start.")));
-
-	/*
-	 * OK to reset backup counter and session-level lock.
-	 *
-	 * Note that CHECK_FOR_INTERRUPTS() must not occur while updating them,
-	 * otherwise they can be updated inconsistently, which might cause
-	 * do_pg_abort_backup() to fail.
-	 *
-	 * It is expected that each do_pg_backup_start() call is matched by
-	 * exactly one do_pg_backup_stop() call.
-	 */
-	XLogBackupResetRunning();
-
-	/*
-	 * If we are taking an online backup from the standby, we confirm that the
-	 * standby has not been promoted during the backup.
-	 */
-	if (state->started_in_recovery && !backup_stopped_in_recovery)
-		ereport(ERROR,
-				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
-				 errmsg("the standby was promoted during online backup"),
-				 errhint("This means that the backup being taken is corrupt "
-						 "and should not be used. "
-						 "Try taking another online backup.")));
-
-	/*
-	 * During recovery, we don't write an end-of-backup record. We assume that
-	 * pg_control was backed up last and its minimum recovery point can be
-	 * available as the backup end location. Since we don't have an
-	 * end-of-backup record, we use the pg_control value to check whether
-	 * we've reached the end of backup when starting recovery from this
-	 * backup. We have no way of checking if pg_control wasn't backed up last
-	 * however.
-	 *
-	 * We don't force a switch to new WAL file but it is still possible to
-	 * wait for all the required files to be archived if waitforarchive is
-	 * true. This is okay if we use the backup to start a standby and fetch
-	 * the missing WAL using streaming replication. But in the case of an
-	 * archive recovery, a user should set waitforarchive to true and wait for
-	 * them to be archived to ensure that all the required files are
-	 * available.
-	 *
-	 * We return the current minimum recovery point as the backup end
-	 * location. Note that it can be greater than the exact backup end
-	 * location if the minimum recovery point is updated after the backup of
-	 * pg_control. This is harmless for current uses.
-	 *
-	 * XXX currently a backup history file is for informational and debug
-	 * purposes only. It's not essential for an online backup. Furthermore,
-	 * even if it's created, it will not be archived during recovery because
-	 * an archiver is not invoked. So it doesn't seem worthwhile to write a
-	 * backup history file during recovery.
-	 */
-	if (backup_stopped_in_recovery)
-	{
-		XLogRecPtr	recptr;
-
-		/*
-		 * Check to see if all WAL replayed during online backup contain
-		 * full-page writes.
-		 */
-		recptr = XLogGetLastFPWDisableRecptr();
-
-		if (state->startpoint <= recptr)
-			ereport(ERROR,
-					(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
-					 errmsg("WAL generated with full_page_writes=off was replayed "
-							"during online backup"),
-					 errhint("This means that the backup being taken on the standby "
-							 "is corrupt and should not be used. "
-							 "Enable full_page_writes and run CHECKPOINT on the primary, "
-							 "and then try an online backup again.")));
-
-
-		XLogGetMinRecoveryPoint(&state->stoppoint, &state->stoptli);
-	}
-	else
-	{
-		char	   *history_file;
-
-		/*
-		 * Write the backup-end xlog record
-		 */
-		XLogBeginInsert();
-		XLogRegisterData((char *) (&state->startpoint),
-						 sizeof(state->startpoint));
-		state->stoppoint = XLogInsert(RM_XLOG_ID, XLOG_BACKUP_END);
-
-		/*
-		 * Given that we're not in recovery, InsertTimeLineID is set and can't
-		 * change, so we can read it without a lock.
-		 */
-		state->stoptli = GetWALInsertionTimeLine();
-
-		/*
-		 * Force a switch to a new xlog segment file, so that the backup is
-		 * valid as soon as archiver moves out the current segment file.
-		 */
-		RequestXLogSwitch(false);
-
-		state->stoptime = (pg_time_t) time(NULL);
-
-		/*
-		 * Write the backup history file
-		 */
-		XLByteToSeg(state->startpoint, _logSegNo, wal_segment_size);
-		BackupHistoryFilePath(histfilepath, state->stoptli, _logSegNo,
-							  state->startpoint, wal_segment_size);
-		fp = AllocateFile(histfilepath, "w");
-		if (!fp)
-			ereport(ERROR,
-					(errcode_for_file_access(),
-					 errmsg("could not create file \"%s\": %m",
-							histfilepath)));
-
-		/* Build and save the contents of the backup history file */
-		history_file = build_backup_content(state, true);
-		fprintf(fp, "%s", history_file);
-		pfree(history_file);
-
-		if (fflush(fp) || ferror(fp) || FreeFile(fp))
-			ereport(ERROR,
-					(errcode_for_file_access(),
-					 errmsg("could not write file \"%s\": %m",
-							histfilepath)));
-
-		/*
-		 * Clean out any no-longer-needed history files.  As a side effect,
-		 * this will post a .ready file for the newly created history file,
-		 * notifying the archiver that history file may be archived
-		 * immediately.
-		 */
-		CleanupBackupHistory();
-	}
-
-	/*
-	 * If archiving is enabled, wait for all the required WAL files to be
-	 * archived before returning. If archiving isn't enabled, the required WAL
-	 * needs to be transported via streaming replication (hopefully with
-	 * wal_keep_size set high enough), or some more exotic mechanism like
-	 * polling and copying files from pg_wal with script. We have no knowledge
-	 * of those mechanisms, so it's up to the user to ensure that he gets all
-	 * the required WAL.
-	 *
-	 * We wait until both the last WAL file filled during backup and the
-	 * history file have been archived, and assume that the alphabetic sorting
-	 * property of the WAL files ensures any earlier WAL files are safely
-	 * archived as well.
-	 *
-	 * We wait forever, since archive_command is supposed to work and we
-	 * assume the admin wanted his backup to work completely. If you don't
-	 * wish to wait, then either waitforarchive should be passed in as false,
-	 * or you can set statement_timeout.  Also, some notices are issued to
-	 * clue in anyone who might be doing this interactively.
-	 */
-
-	if (waitforarchive &&
-		((!backup_stopped_in_recovery && XLogArchivingActive()) ||
-		 (backup_stopped_in_recovery && XLogArchivingAlways())))
-	{
-		XLByteToPrevSeg(state->stoppoint, _logSegNo, wal_segment_size);
-		XLogFileName(lastxlogfilename, state->stoptli, _logSegNo,
-					 wal_segment_size);
-
-		XLByteToSeg(state->startpoint, _logSegNo, wal_segment_size);
-		BackupHistoryFileName(histfilename, state->stoptli, _logSegNo,
-							  state->startpoint, wal_segment_size);
-
-		seconds_before_warning = 60;
-		waits = 0;
-
-		while (XLogArchiveIsBusy(lastxlogfilename) ||
-			   XLogArchiveIsBusy(histfilename))
-		{
-			CHECK_FOR_INTERRUPTS();
-
-			if (!reported_waiting && waits > 5)
-			{
-				ereport(NOTICE,
-						(errmsg("base backup done, waiting for required WAL segments to be archived")));
-				reported_waiting = true;
-			}
-
-			(void) WaitLatch(MyLatch,
-							 WL_LATCH_SET | WL_TIMEOUT | WL_EXIT_ON_PM_DEATH,
-							 1000L,
-							 WAIT_EVENT_BACKUP_WAIT_WAL_ARCHIVE);
-			ResetLatch(MyLatch);
-
-			if (++waits >= seconds_before_warning)
-			{
-				seconds_before_warning *= 2;	/* This wraps in >10 years... */
-				ereport(WARNING,
-						(errmsg("still waiting for all required WAL segments to be archived (%d seconds elapsed)",
-								waits),
-						 errhint("Check that your archive_command is executing properly.  "
-								 "You can safely cancel this backup, "
-								 "but the database backup will not be usable without all the WAL segments.")));
-			}
-		}
-
-		ereport(NOTICE,
-				(errmsg("all required WAL segments have been archived")));
-	}
-	else if (waitforarchive)
-		ereport(NOTICE,
-				(errmsg("WAL archiving is not enabled; you must ensure that all required WAL segments are copied through other means to complete the backup")));
-}
-
-
-/*
- * do_pg_abort_backup: abort a running backup
- *
- * This does just the most basic steps of do_pg_backup_stop(), by taking the
- * system out of backup mode, thus making it a lot more safe to call from
- * an error handler.
- *
- * 'arg' indicates that it's being called during backup setup; so
- * sessionBackupState has not been modified yet, but runningBackups has
- * already been incremented.  When it's false, then it's invoked as a
- * before_shmem_exit handler, and therefore we must not change state
- * unless sessionBackupState indicates that a backup is actually running.
- *
- * NB: This gets used as a PG_ENSURE_ERROR_CLEANUP callback and
- * before_shmem_exit handler, hence the odd-looking signature.
- */
-void
-do_pg_abort_backup(int code, Datum arg)
-{
-	bool		during_backup_start = DatumGetBool(arg);
-
-	/* Only one of these conditions can be true */
-	Assert(during_backup_start ^
-		   (sessionBackupState == SESSION_BACKUP_RUNNING));
-
-	if (during_backup_start || sessionBackupState != SESSION_BACKUP_NONE)
-	{
-		XLogBackupResetRunning();
-
-		if (!during_backup_start)
-			ereport(WARNING,
-					errmsg("aborting backup due to backend exiting before pg_backup_stop was called"));
-	}
-}
-
-/*
- * Register a handler that will warn about unterminated backups at end of
- * session, unless this has already been done.
- */
-void
-register_persistent_abort_backup_handler(void)
-{
-	static bool already_done = false;
-
-	if (already_done)
-		return;
-	before_shmem_exit(do_pg_abort_backup, DatumGetBool(false));
-	already_done = true;
-}
-
 /*
  * Get the checkpoint location.
  */
diff --git a/src/backend/access/transam/xlogbackup.c b/src/backend/access/transam/xlogbackup.c
index 90b5273b02..7947a8e0b6 100644
--- a/src/backend/access/transam/xlogbackup.c
+++ b/src/backend/access/transam/xlogbackup.c
@@ -13,9 +13,32 @@
 
 #include "postgres.h"
 
+#include <time.h>
+#include <unistd.h>
+
 #include "access/xlog.h"
 #include "access/xlog_internal.h"
+#include "access/xlogarchive.h"
 #include "access/xlogbackup.h"
+#include "access/xloginsert.h"
+#include "backup/basebackup.h"
+#include "catalog/pg_control.h"
+#include "common/file_utils.h"
+#include "miscadmin.h"
+#include "postmaster/bgwriter.h"
+#include "storage/fd.h"
+#include "storage/ipc.h"
+#include "storage/latch.h"
+#include "storage/lwlock.h"
+#include "utils/wait_event.h"
+
+/*
+ * Session status of running backup, used for sanity checks in functions to
+ * start, stop and abort backups.
+ */
+static SessionBackupState sessionBackupState = SESSION_BACKUP_NONE;
+
+static void CleanupBackupHistory(void);
 
 /*
  * Build contents for backup_label or backup history file.
@@ -82,3 +105,643 @@ build_backup_content(BackupState *state, bool ishistoryfile)
 
 	return data;
 }
+
+/*
+ * Remove previous backup history files.  This also retries creation of
+ * .ready files for any backup history files for which XLogArchiveNotify
+ * failed earlier.
+ */
+static void
+CleanupBackupHistory(void)
+{
+	DIR		   *xldir;
+	struct dirent *xlde;
+	char		path[MAXPGPATH + sizeof(XLOGDIR)];
+
+	xldir = AllocateDir(XLOGDIR);
+
+	while ((xlde = ReadDir(xldir, XLOGDIR)) != NULL)
+	{
+		if (IsBackupHistoryFileName(xlde->d_name))
+		{
+			if (XLogArchiveCheckDone(xlde->d_name))
+			{
+				elog(DEBUG2, "removing WAL backup history file \"%s\"",
+					 xlde->d_name);
+				snprintf(path, sizeof(path), XLOGDIR "/%s", xlde->d_name);
+				unlink(path);
+				XLogArchiveCleanup(xlde->d_name);
+			}
+		}
+	}
+
+	FreeDir(xldir);
+}
+
+/*
+ * do_pg_backup_start is the workhorse of the user-visible pg_backup_start()
+ * function. It creates the necessary starting checkpoint and constructs the
+ * backup state and tablespace map.
+ *
+ * Input parameters are "state" (the backup state), "fast" (if true, we do
+ * the checkpoint in immediate mode to make it faster), and "tablespaces"
+ * (if non-NULL, indicates a list of tablespaceinfo structs describing the
+ * cluster's tablespaces.).
+ *
+ * The tablespace map contents are appended to passed-in parameter
+ * tablespace_map and the caller is responsible for including it in the backup
+ * archive as 'tablespace_map'. The tablespace_map file is required mainly for
+ * tar format in windows as native windows utilities are not able to create
+ * symlinks while extracting files from tar. However for consistency and
+ * platform-independence, we do it the same way everywhere.
+ *
+ * It fills in "state" with the information required for the backup, such
+ * as the minimum WAL location that must be present to restore from this
+ * backup (starttli) and the corresponding timeline ID (starttli).
+ *
+ * Every successfully started backup must be stopped by calling
+ * do_pg_backup_stop() or do_pg_abort_backup(). There can be many
+ * backups active at the same time.
+ *
+ * It is the responsibility of the caller of this function to verify the
+ * permissions of the calling user!
+ */
+void
+do_pg_backup_start(const char *backupidstr, bool fast, List **tablespaces,
+				   BackupState *state, StringInfo tblspcmapfile)
+{
+	bool		backup_started_in_recovery;
+
+	Assert(state != NULL);
+	backup_started_in_recovery = RecoveryInProgress();
+
+	/*
+	 * During recovery, we don't need to check WAL level. Because, if WAL
+	 * level is not sufficient, it's impossible to get here during recovery.
+	 */
+	if (!backup_started_in_recovery && !XLogIsNeeded())
+		ereport(ERROR,
+				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+				 errmsg("WAL level not sufficient for making an online backup"),
+				 errhint("wal_level must be set to \"replica\" or \"logical\" at server start.")));
+
+	if (strlen(backupidstr) > MAXPGPATH)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("backup label too long (max %d bytes)",
+						MAXPGPATH)));
+
+	memcpy(state->name, backupidstr, strlen(backupidstr));
+
+	/*
+	 * Mark backup active in shared memory.  We must do full-page WAL writes
+	 * during an on-line backup even if not doing so at other times, because
+	 * it's quite possible for the backup dump to obtain a "torn" (partially
+	 * written) copy of a database page if it reads the page concurrently with
+	 * our write to the same page.  This can be fixed as long as the first
+	 * write to the page in the WAL sequence is a full-page write. Hence, we
+	 * increment runningBackups then force a CHECKPOINT, to ensure there are
+	 * no dirty pages in shared memory that might get dumped while the backup
+	 * is in progress without having a corresponding WAL record.  (Once the
+	 * backup is complete, we need not force full-page writes anymore, since
+	 * we expect that any pages not modified during the backup interval must
+	 * have been correctly captured by the backup.)
+	 *
+	 * Note that forcing full-page writes has no effect during an online
+	 * backup from the standby.
+	 *
+	 * We must hold all the insertion locks to change the value of
+	 * runningBackups, to ensure adequate interlocking against
+	 * XLogInsertRecord().
+	 */
+	XLogBackupSetRunning();
+
+	/*
+	 * Ensure we decrement runningBackups if we fail below. NB -- for this to
+	 * work correctly, it is critical that sessionBackupState is only updated
+	 * after this block is over.
+	 */
+	PG_ENSURE_ERROR_CLEANUP(do_pg_abort_backup, DatumGetBool(true));
+	{
+		bool		gotUniqueStartpoint = false;
+		DIR		   *tblspcdir;
+		struct dirent *de;
+		tablespaceinfo *ti;
+		int			datadirpathlen;
+
+		/*
+		 * Force an XLOG file switch before the checkpoint, to ensure that the
+		 * WAL segment the checkpoint is written to doesn't contain pages with
+		 * old timeline IDs.  That would otherwise happen if you called
+		 * pg_backup_start() right after restoring from a PITR archive: the
+		 * first WAL segment containing the startup checkpoint has pages in
+		 * the beginning with the old timeline ID.  That can cause trouble at
+		 * recovery: we won't have a history file covering the old timeline if
+		 * pg_wal directory was not included in the base backup and the WAL
+		 * archive was cleared too before starting the backup.
+		 *
+		 * This also ensures that we have emitted a WAL page header that has
+		 * XLP_BKP_REMOVABLE off before we emit the checkpoint record.
+		 * Therefore, if a WAL archiver (such as pglesslog) is trying to
+		 * compress out removable backup blocks, it won't remove any that
+		 * occur after this point.
+		 *
+		 * During recovery, we skip forcing XLOG file switch, which means that
+		 * the backup taken during recovery is not available for the special
+		 * recovery case described above.
+		 */
+		if (!backup_started_in_recovery)
+			RequestXLogSwitch(false);
+
+		do
+		{
+			bool		checkpointfpw;
+
+			/*
+			 * Force a CHECKPOINT.  Aside from being necessary to prevent torn
+			 * page problems, this guarantees that two successive backup runs
+			 * will have different checkpoint positions and hence different
+			 * history file names, even if nothing happened in between.
+			 *
+			 * During recovery, establish a restartpoint if possible. We use
+			 * the last restartpoint as the backup starting checkpoint. This
+			 * means that two successive backup runs can have same checkpoint
+			 * positions.
+			 *
+			 * Since the fact that we are executing do_pg_backup_start()
+			 * during recovery means that checkpointer is running, we can use
+			 * RequestCheckpoint() to establish a restartpoint.
+			 *
+			 * We use CHECKPOINT_IMMEDIATE only if requested by user (via
+			 * passing fast = true).  Otherwise this can take awhile.
+			 */
+			RequestCheckpoint(CHECKPOINT_FORCE | CHECKPOINT_WAIT |
+							  (fast ? CHECKPOINT_IMMEDIATE : 0));
+
+			/*
+			 * Now we need to fetch the checkpoint record location, and also
+			 * its REDO pointer.  The oldest point in WAL that would be needed
+			 * to restore starting from the checkpoint is precisely the REDO
+			 * pointer.
+			 */
+			GetCheckpointLocation(&state->checkpointloc, &state->startpoint,
+								  &state->starttli, &checkpointfpw);
+
+			if (backup_started_in_recovery)
+			{
+				XLogRecPtr	recptr;
+
+				/*
+				 * Check to see if all WAL replayed during online backup
+				 * (i.e., since last restartpoint used as backup starting
+				 * checkpoint) contain full-page writes.
+				 */
+				recptr = XLogGetLastFPWDisableRecptr();
+
+				if (!checkpointfpw || state->startpoint <= recptr)
+					ereport(ERROR,
+							(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+							 errmsg("WAL generated with full_page_writes=off was replayed "
+									"since last restartpoint"),
+							 errhint("This means that the backup being taken on the standby "
+									 "is corrupt and should not be used. "
+									 "Enable full_page_writes and run CHECKPOINT on the primary, "
+									 "and then try an online backup again.")));
+
+				/*
+				 * During recovery, since we don't use the end-of-backup WAL
+				 * record and don't write the backup history file, the
+				 * starting WAL location doesn't need to be unique. This means
+				 * that two base backups started at the same time might use
+				 * the same checkpoint as starting locations.
+				 */
+				gotUniqueStartpoint = true;
+			}
+
+			/*
+			 * If two base backups are started at the same time (in WAL sender
+			 * processes), we need to make sure that they use different
+			 * checkpoints as starting locations, because we use the starting
+			 * WAL location as a unique identifier for the base backup in the
+			 * end-of-backup WAL record and when we write the backup history
+			 * file. Perhaps it would be better generate a separate unique ID
+			 * for each backup instead of forcing another checkpoint, but
+			 * taking a checkpoint right after another is not that expensive
+			 * either because only few buffers have been dirtied yet.
+			 */
+			gotUniqueStartpoint = XLogBackupSetLastStart(state->startpoint);
+		} while (!gotUniqueStartpoint);
+
+		/*
+		 * Construct tablespace_map file.
+		 */
+		datadirpathlen = strlen(DataDir);
+
+		/* Collect information about all tablespaces */
+		tblspcdir = AllocateDir("pg_tblspc");
+		while ((de = ReadDir(tblspcdir, "pg_tblspc")) != NULL)
+		{
+			char		fullpath[MAXPGPATH + 10];
+			char		linkpath[MAXPGPATH];
+			char	   *relpath = NULL;
+			int			rllen;
+			StringInfoData escapedpath;
+			char	   *s;
+
+			/* Skip anything that doesn't look like a tablespace */
+			if (strspn(de->d_name, "0123456789") != strlen(de->d_name))
+				continue;
+
+			snprintf(fullpath, sizeof(fullpath), "pg_tblspc/%s", de->d_name);
+
+			/*
+			 * Skip anything that isn't a symlink/junction.  For testing only,
+			 * we sometimes use allow_in_place_tablespaces to create
+			 * directories directly under pg_tblspc, which would fail below.
+			 */
+			if (get_dirent_type(fullpath, de, false, ERROR) != PGFILETYPE_LNK)
+				continue;
+
+			rllen = readlink(fullpath, linkpath, sizeof(linkpath));
+			if (rllen < 0)
+			{
+				ereport(WARNING,
+						(errmsg("could not read symbolic link \"%s\": %m",
+								fullpath)));
+				continue;
+			}
+			else if (rllen >= sizeof(linkpath))
+			{
+				ereport(WARNING,
+						(errmsg("symbolic link \"%s\" target is too long",
+								fullpath)));
+				continue;
+			}
+			linkpath[rllen] = '\0';
+
+			/*
+			 * Build a backslash-escaped version of the link path to include
+			 * in the tablespace map file.
+			 */
+			initStringInfo(&escapedpath);
+			for (s = linkpath; *s; s++)
+			{
+				if (*s == '\n' || *s == '\r' || *s == '\\')
+					appendStringInfoChar(&escapedpath, '\\');
+				appendStringInfoChar(&escapedpath, *s);
+			}
+
+			/*
+			 * Relpath holds the relative path of the tablespace directory
+			 * when it's located within PGDATA, or NULL if it's located
+			 * elsewhere.
+			 */
+			if (rllen > datadirpathlen &&
+				strncmp(linkpath, DataDir, datadirpathlen) == 0 &&
+				IS_DIR_SEP(linkpath[datadirpathlen]))
+				relpath = linkpath + datadirpathlen + 1;
+
+			ti = palloc(sizeof(tablespaceinfo));
+			ti->oid = pstrdup(de->d_name);
+			ti->path = pstrdup(linkpath);
+			ti->rpath = relpath ? pstrdup(relpath) : NULL;
+			ti->size = -1;
+
+			if (tablespaces)
+				*tablespaces = lappend(*tablespaces, ti);
+
+			appendStringInfo(tblspcmapfile, "%s %s\n",
+							 ti->oid, escapedpath.data);
+
+			pfree(escapedpath.data);
+		}
+		FreeDir(tblspcdir);
+
+		state->starttime = (pg_time_t) time(NULL);
+	}
+	PG_END_ENSURE_ERROR_CLEANUP(do_pg_abort_backup, DatumGetBool(true));
+
+	state->started_in_recovery = backup_started_in_recovery;
+
+	/*
+	 * Mark that the start phase has correctly finished for the backup.
+	 */
+	sessionBackupState = SESSION_BACKUP_RUNNING;
+}
+
+/*
+ * Utility routine to fetch the session-level status of a backup running.
+ */
+SessionBackupState
+get_backup_status(void)
+{
+	return sessionBackupState;
+}
+
+/*
+ * Utility routine to reset the session-level status of a backup running.
+ */
+void
+reset_backup_status(void)
+{
+	sessionBackupState = SESSION_BACKUP_NONE;
+}
+
+/*
+ * do_pg_backup_stop
+ *
+ * Utility function called at the end of an online backup.  It creates history
+ * file (if required), resets sessionBackupState and so on.  It can optionally
+ * wait for WAL segments to be archived.
+ *
+ * "state" is filled with the information necessary to restore from this
+ * backup with its stop LSN (stoppoint), its timeline ID (stoptli), etc.
+ *
+ * It is the responsibility of the caller of this function to verify the
+ * permissions of the calling user!
+ */
+void
+do_pg_backup_stop(BackupState *state, bool waitforarchive)
+{
+	bool		backup_stopped_in_recovery = false;
+	char		histfilepath[MAXPGPATH];
+	char		lastxlogfilename[MAXFNAMELEN];
+	char		histfilename[MAXFNAMELEN];
+	XLogSegNo	_logSegNo;
+	FILE	   *fp;
+	int			seconds_before_warning;
+	int			waits = 0;
+	bool		reported_waiting = false;
+
+	Assert(state != NULL);
+
+	backup_stopped_in_recovery = RecoveryInProgress();
+
+	/*
+	 * During recovery, we don't need to check WAL level. Because, if WAL
+	 * level is not sufficient, it's impossible to get here during recovery.
+	 */
+	if (!backup_stopped_in_recovery && !XLogIsNeeded())
+		ereport(ERROR,
+				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+				 errmsg("WAL level not sufficient for making an online backup"),
+				 errhint("wal_level must be set to \"replica\" or \"logical\" at server start.")));
+
+	/*
+	 * OK to reset backup counter and session-level lock.
+	 *
+	 * Note that CHECK_FOR_INTERRUPTS() must not occur while updating them,
+	 * otherwise they can be updated inconsistently, which might cause
+	 * do_pg_abort_backup() to fail.
+	 *
+	 * It is expected that each do_pg_backup_start() call is matched by
+	 * exactly one do_pg_backup_stop() call.
+	 */
+	XLogBackupResetRunning();
+
+	/*
+	 * If we are taking an online backup from the standby, we confirm that the
+	 * standby has not been promoted during the backup.
+	 */
+	if (state->started_in_recovery && !backup_stopped_in_recovery)
+		ereport(ERROR,
+				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+				 errmsg("the standby was promoted during online backup"),
+				 errhint("This means that the backup being taken is corrupt "
+						 "and should not be used. "
+						 "Try taking another online backup.")));
+
+	/*
+	 * During recovery, we don't write an end-of-backup record. We assume that
+	 * pg_control was backed up last and its minimum recovery point can be
+	 * available as the backup end location. Since we don't have an
+	 * end-of-backup record, we use the pg_control value to check whether
+	 * we've reached the end of backup when starting recovery from this
+	 * backup. We have no way of checking if pg_control wasn't backed up last
+	 * however.
+	 *
+	 * We don't force a switch to new WAL file but it is still possible to
+	 * wait for all the required files to be archived if waitforarchive is
+	 * true. This is okay if we use the backup to start a standby and fetch
+	 * the missing WAL using streaming replication. But in the case of an
+	 * archive recovery, a user should set waitforarchive to true and wait for
+	 * them to be archived to ensure that all the required files are
+	 * available.
+	 *
+	 * We return the current minimum recovery point as the backup end
+	 * location. Note that it can be greater than the exact backup end
+	 * location if the minimum recovery point is updated after the backup of
+	 * pg_control. This is harmless for current uses.
+	 *
+	 * XXX currently a backup history file is for informational and debug
+	 * purposes only. It's not essential for an online backup. Furthermore,
+	 * even if it's created, it will not be archived during recovery because
+	 * an archiver is not invoked. So it doesn't seem worthwhile to write a
+	 * backup history file during recovery.
+	 */
+	if (backup_stopped_in_recovery)
+	{
+		XLogRecPtr	recptr;
+
+		/*
+		 * Check to see if all WAL replayed during online backup contain
+		 * full-page writes.
+		 */
+		recptr = XLogGetLastFPWDisableRecptr();
+
+		if (state->startpoint <= recptr)
+			ereport(ERROR,
+					(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+					 errmsg("WAL generated with full_page_writes=off was replayed "
+							"during online backup"),
+					 errhint("This means that the backup being taken on the standby "
+							 "is corrupt and should not be used. "
+							 "Enable full_page_writes and run CHECKPOINT on the primary, "
+							 "and then try an online backup again.")));
+
+
+		XLogGetMinRecoveryPoint(&state->stoppoint, &state->stoptli);
+	}
+	else
+	{
+		char	   *history_file;
+
+		/*
+		 * Write the backup-end xlog record
+		 */
+		XLogBeginInsert();
+		XLogRegisterData((char *) (&state->startpoint),
+						 sizeof(state->startpoint));
+		state->stoppoint = XLogInsert(RM_XLOG_ID, XLOG_BACKUP_END);
+
+		/*
+		 * Given that we're not in recovery, InsertTimeLineID is set and can't
+		 * change, so we can read it without a lock.
+		 */
+		state->stoptli = GetWALInsertionTimeLine();
+
+		/*
+		 * Force a switch to a new xlog segment file, so that the backup is
+		 * valid as soon as archiver moves out the current segment file.
+		 */
+		RequestXLogSwitch(false);
+
+		state->stoptime = (pg_time_t) time(NULL);
+
+		/*
+		 * Write the backup history file
+		 */
+		XLByteToSeg(state->startpoint, _logSegNo, wal_segment_size);
+		BackupHistoryFilePath(histfilepath, state->stoptli, _logSegNo,
+							  state->startpoint, wal_segment_size);
+		fp = AllocateFile(histfilepath, "w");
+		if (!fp)
+			ereport(ERROR,
+					(errcode_for_file_access(),
+					 errmsg("could not create file \"%s\": %m",
+							histfilepath)));
+
+		/* Build and save the contents of the backup history file */
+		history_file = build_backup_content(state, true);
+		fprintf(fp, "%s", history_file);
+		pfree(history_file);
+
+		if (fflush(fp) || ferror(fp) || FreeFile(fp))
+			ereport(ERROR,
+					(errcode_for_file_access(),
+					 errmsg("could not write file \"%s\": %m",
+							histfilepath)));
+
+		/*
+		 * Clean out any no-longer-needed history files.  As a side effect,
+		 * this will post a .ready file for the newly created history file,
+		 * notifying the archiver that history file may be archived
+		 * immediately.
+		 */
+		CleanupBackupHistory();
+	}
+
+	/*
+	 * If archiving is enabled, wait for all the required WAL files to be
+	 * archived before returning. If archiving isn't enabled, the required WAL
+	 * needs to be transported via streaming replication (hopefully with
+	 * wal_keep_size set high enough), or some more exotic mechanism like
+	 * polling and copying files from pg_wal with script. We have no knowledge
+	 * of those mechanisms, so it's up to the user to ensure that he gets all
+	 * the required WAL.
+	 *
+	 * We wait until both the last WAL file filled during backup and the
+	 * history file have been archived, and assume that the alphabetic sorting
+	 * property of the WAL files ensures any earlier WAL files are safely
+	 * archived as well.
+	 *
+	 * We wait forever, since archive_command is supposed to work and we
+	 * assume the admin wanted his backup to work completely. If you don't
+	 * wish to wait, then either waitforarchive should be passed in as false,
+	 * or you can set statement_timeout.  Also, some notices are issued to
+	 * clue in anyone who might be doing this interactively.
+	 */
+
+	if (waitforarchive &&
+		((!backup_stopped_in_recovery && XLogArchivingActive()) ||
+		 (backup_stopped_in_recovery && XLogArchivingAlways())))
+	{
+		XLByteToPrevSeg(state->stoppoint, _logSegNo, wal_segment_size);
+		XLogFileName(lastxlogfilename, state->stoptli, _logSegNo,
+					 wal_segment_size);
+
+		XLByteToSeg(state->startpoint, _logSegNo, wal_segment_size);
+		BackupHistoryFileName(histfilename, state->stoptli, _logSegNo,
+							  state->startpoint, wal_segment_size);
+
+		seconds_before_warning = 60;
+		waits = 0;
+
+		while (XLogArchiveIsBusy(lastxlogfilename) ||
+			   XLogArchiveIsBusy(histfilename))
+		{
+			CHECK_FOR_INTERRUPTS();
+
+			if (!reported_waiting && waits > 5)
+			{
+				ereport(NOTICE,
+						(errmsg("base backup done, waiting for required WAL segments to be archived")));
+				reported_waiting = true;
+			}
+
+			(void) WaitLatch(MyLatch,
+							 WL_LATCH_SET | WL_TIMEOUT | WL_EXIT_ON_PM_DEATH,
+							 1000L,
+							 WAIT_EVENT_BACKUP_WAIT_WAL_ARCHIVE);
+			ResetLatch(MyLatch);
+
+			if (++waits >= seconds_before_warning)
+			{
+				seconds_before_warning *= 2;	/* This wraps in >10 years... */
+				ereport(WARNING,
+						(errmsg("still waiting for all required WAL segments to be archived (%d seconds elapsed)",
+								waits),
+						 errhint("Check that your archive_command is executing properly.  "
+								 "You can safely cancel this backup, "
+								 "but the database backup will not be usable without all the WAL segments.")));
+			}
+		}
+
+		ereport(NOTICE,
+				(errmsg("all required WAL segments have been archived")));
+	}
+	else if (waitforarchive)
+		ereport(NOTICE,
+				(errmsg("WAL archiving is not enabled; you must ensure that all required WAL segments are copied through other means to complete the backup")));
+}
+
+
+/*
+ * do_pg_abort_backup: abort a running backup
+ *
+ * This does just the most basic steps of do_pg_backup_stop(), by taking the
+ * system out of backup mode, thus making it a lot more safe to call from
+ * an error handler.
+ *
+ * 'arg' indicates that it's being called during backup setup; so
+ * sessionBackupState has not been modified yet, but runningBackups has
+ * already been incremented.  When it's false, then it's invoked as a
+ * before_shmem_exit handler, and therefore we must not change state
+ * unless sessionBackupState indicates that a backup is actually running.
+ *
+ * NB: This gets used as a PG_ENSURE_ERROR_CLEANUP callback and
+ * before_shmem_exit handler, hence the odd-looking signature.
+ */
+void
+do_pg_abort_backup(int code, Datum arg)
+{
+	bool		during_backup_start = DatumGetBool(arg);
+
+	/* Only one of these conditions can be true */
+	Assert(during_backup_start ^
+		   (sessionBackupState == SESSION_BACKUP_RUNNING));
+
+	if (during_backup_start || sessionBackupState != SESSION_BACKUP_NONE)
+	{
+		XLogBackupResetRunning();
+
+		if (!during_backup_start)
+			ereport(WARNING,
+					errmsg("aborting backup due to backend exiting before pg_backup_stop was called"));
+	}
+}
+
+/*
+ * Register a handler that will warn about unterminated backups at end of
+ * session, unless this has already been done.
+ */
+void
+register_persistent_abort_backup_handler(void)
+{
+	static bool already_done = false;
+
+	if (already_done)
+		return;
+	before_shmem_exit(do_pg_abort_backup, DatumGetBool(false));
+	already_done = true;
+}
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index b46adca291..d9eff0bf9a 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -11,7 +11,6 @@
 #ifndef XLOG_H
 #define XLOG_H
 
-#include "access/xlogbackup.h"
 #include "access/xlogdefs.h"
 #include "datatype/timestamp.h"
 #include "lib/stringinfo.h"
@@ -258,35 +257,6 @@ extern void SetInstallXLogFileSegmentActive(void);
 extern bool IsInstallXLogFileSegmentActive(void);
 extern void XLogShutdownWalRcv(void);
 
-/*
- * Routines to start, stop, and get status of a base backup.
- */
-
-/*
- * Session-level status of base backups
- *
- * This is used in parallel with the shared memory status to control parallel
- * execution of base backup functions for a given session, be it a backend
- * dedicated to replication or a normal backend connected to a database. The
- * update of the session-level status happens at the same time as the shared
- * memory counters to keep a consistent global and local state of the backups
- * running.
- */
-typedef enum SessionBackupState
-{
-	SESSION_BACKUP_NONE,
-	SESSION_BACKUP_RUNNING,
-} SessionBackupState;
-
-extern void do_pg_backup_start(const char *backupidstr, bool fast,
-							   List **tablespaces, BackupState *state,
-							   StringInfo tblspcmapfile);
-extern void do_pg_backup_stop(BackupState *state, bool waitforarchive);
-extern void do_pg_abort_backup(int code, Datum arg);
-extern void register_persistent_abort_backup_handler(void);
-extern SessionBackupState get_backup_status(void);
-extern void reset_backup_status(void);
-
 /* File path names (all relative to $PGDATA) */
 #define RECOVERY_SIGNAL_FILE	"recovery.signal"
 #define STANDBY_SIGNAL_FILE		"standby.signal"
diff --git a/src/include/access/xlogbackup.h b/src/include/access/xlogbackup.h
index 8ec3d88b0a..8ca93754b7 100644
--- a/src/include/access/xlogbackup.h
+++ b/src/include/access/xlogbackup.h
@@ -15,6 +15,7 @@
 #define XLOG_BACKUP_H
 
 #include "access/xlogdefs.h"
+#include "nodes/pg_list.h"
 #include "pgtime.h"
 
 /* Structure to hold backup state. */
@@ -35,7 +36,35 @@ typedef struct BackupState
 	pg_time_t	stoptime;		/* backup stop time */
 } BackupState;
 
+/*
+ * Session-level status of base backups
+ *
+ * This is used in parallel with the shared memory status to control parallel
+ * execution of base backup functions for a given session, be it a backend
+ * dedicated to replication or a normal backend connected to a database. The
+ * update of the session-level status happens at the same time as the shared
+ * memory counters to keep a consistent global and local state of the backups
+ * running.
+ */
+typedef enum SessionBackupState
+{
+	SESSION_BACKUP_NONE,
+	SESSION_BACKUP_RUNNING,
+} SessionBackupState;
+
 extern char *build_backup_content(BackupState *state,
 								  bool ishistoryfile);
 
+/*
+ * Routines to start, stop, and get status of a base backup.
+ */
+extern void do_pg_backup_start(const char *backupidstr, bool fast,
+							   List **tablespaces, BackupState *state,
+							   StringInfo tblspcmapfile);
+extern void do_pg_backup_stop(BackupState *state, bool waitforarchive);
+extern void do_pg_abort_backup(int code, Datum arg);
+extern void register_persistent_abort_backup_handler(void);
+extern SessionBackupState get_backup_status(void);
+extern void reset_backup_status(void);
+
 #endif							/* XLOG_BACKUP_H */
-- 
2.34.1

Reply via email to