On Wed, Oct 5, 2022 at 1:20 PM Michael Paquier <mich...@paquier.xyz> wrote:
>
> On Tue, Oct 04, 2022 at 03:54:20PM -0700, Nathan Bossart wrote:
> > I would suggest moving this to a separate prerequisite patch that can be
> > reviewed independently from the patches that simply move code to a
> > different file.

I added the new functions in 0001 patch for ease of review.

> And FWIW, the SQL interfaces for pg_backup_start() and
> pg_backup_stop() could stay in xlogfuncs.c.  This has the advantage to
> centralize in the same file all the SQL-function-specific checks.

Agreed.

+extern void WALInsertLockAcquire(void);
+extern void WALInsertLockAcquireExclusive(void);
+extern void WALInsertLockRelease(void);
+extern void WALInsertLockUpdateInsertingAt(XLogRecPtr insertingAt);

Note that I had moved all WAL insert lock related functions to xlog.h
despite xlogbackup.c using 2 of them. This is done to keep all the
functions together.

Please review the attached v2 patch set.

--
Bharath Rupireddy
PostgreSQL Contributors Team
RDS Open Source Databases
Amazon Web Services: https://aws.amazon.com
From 7a0359813790c5c3872b5308ff562371e6467266 Mon Sep 17 00:00:00 2001
From: Bharath Rupireddy <bharath.rupireddyforpostgres@gmail.com>
Date: Wed, 5 Oct 2022 08:59:45 +0000
Subject: [PATCH v2] Add functions for xlogbackup.c to call back into xlog.c

---
 src/backend/access/transam/xlog.c | 119 ++++++++++++++++++++++++++++--
 src/include/access/xlog.h         |  17 +++++
 2 files changed, 128 insertions(+), 8 deletions(-)

diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 27085b15a8..604220b474 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -697,10 +697,6 @@ static XLogRecPtr XLogBytePosToRecPtr(uint64 bytepos);
 static XLogRecPtr XLogBytePosToEndRecPtr(uint64 bytepos);
 static uint64 XLogRecPtrToBytePos(XLogRecPtr ptr);
 
-static void WALInsertLockAcquire(void);
-static void WALInsertLockAcquireExclusive(void);
-static void WALInsertLockRelease(void);
-static void WALInsertLockUpdateInsertingAt(XLogRecPtr insertingAt);
 
 /*
  * Insert an XLOG record represented by an already-constructed chain of data
@@ -1305,7 +1301,7 @@ CopyXLogRecordToWAL(int write_len, bool isLogSwitch, XLogRecData *rdata,
 /*
  * Acquire a WAL insertion lock, for inserting to WAL.
  */
-static void
+void
 WALInsertLockAcquire(void)
 {
 	bool		immed;
@@ -1350,7 +1346,7 @@ WALInsertLockAcquire(void)
  * Acquire all WAL insertion locks, to prevent other backends from inserting
  * to WAL.
  */
-static void
+void
 WALInsertLockAcquireExclusive(void)
 {
 	int			i;
@@ -1379,7 +1375,7 @@ WALInsertLockAcquireExclusive(void)
  * NB: Reset all variables to 0, so they cause LWLockWaitForVar to block the
  * next time the lock is acquired.
  */
-static void
+void
 WALInsertLockRelease(void)
 {
 	if (holdingAllLocks)
@@ -1405,7 +1401,7 @@ WALInsertLockRelease(void)
  * Update our insertingAt value, to let others know that we've finished
  * inserting up to that point.
  */
-static void
+void
 WALInsertLockUpdateInsertingAt(XLogRecPtr insertingAt)
 {
 	if (holdingAllLocks)
@@ -8983,3 +8979,110 @@ SetWalWriterSleeping(bool sleeping)
 	XLogCtl->WalWriterSleeping = sleeping;
 	SpinLockRelease(&XLogCtl->info_lck);
 }
+
+/*
+ * Get the ControlFile.
+ */
+ ControlFileData *
+ GetControlFile(void)
+ {
+	return ControlFile;
+ }
+
+/*
+ * Set the forcePageWrites flag.
+ */
+void
+SetforcePageWrites(bool need_lock, bool value)
+{
+	if (need_lock)
+		WALInsertLockAcquireExclusive();
+
+	XLogCtl->Insert.forcePageWrites = value;
+
+	if (need_lock)
+		WALInsertLockRelease();
+}
+
+/*
+ * Set the runningBackups value.
+ */
+void
+SetrunningBackups(bool need_lock, int value)
+{
+	if (need_lock)
+		WALInsertLockAcquireExclusive();
+
+	XLogCtl->Insert.runningBackups = value;
+
+	if (need_lock)
+		WALInsertLockRelease();
+}
+
+/*
+ * Get the runningBackups value.
+ */
+int
+GetrunningBackups(bool need_lock)
+{
+	int value;
+
+	if (need_lock)
+		WALInsertLockAcquireExclusive();
+
+	value = XLogCtl->Insert.runningBackups;
+
+	if (need_lock)
+		WALInsertLockRelease();
+
+	return value;
+}
+
+/*
+ * Get the lastFpwDisableRecPtr.
+ */
+XLogRecPtr
+GetlastFpwDisableRecPtr(void)
+{
+	XLogRecPtr recptr;
+
+	SpinLockAcquire(&XLogCtl->info_lck);
+	recptr = XLogCtl->lastFpwDisableRecPtr;
+	SpinLockRelease(&XLogCtl->info_lck);
+
+	return recptr;
+}
+
+/*
+ * Get the lastBackupStar.
+ */
+XLogRecPtr
+GetlastBackupStart(bool need_lock)
+{
+	XLogRecPtr recptr;
+
+	if (need_lock)
+		WALInsertLockAcquireExclusive();
+
+	recptr = XLogCtl->Insert.lastBackupStart;
+
+	if (need_lock)
+		WALInsertLockRelease();
+
+	return recptr;
+}
+
+/*
+ * Set the lastBackupStar.
+ */
+void
+SetlastBackupStart(bool need_lock, XLogRecPtr recptr)
+{
+	if (need_lock)
+		WALInsertLockAcquireExclusive();
+
+	XLogCtl->Insert.lastBackupStart = recptr;
+
+	if (need_lock)
+		WALInsertLockRelease();
+}
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index dce265098e..8109209eae 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -14,6 +14,7 @@
 #include "access/xlogbackup.h"
 #include "access/xlogdefs.h"
 #include "access/xlogreader.h"
+#include "catalog/pg_control.h"
 #include "datatype/timestamp.h"
 #include "lib/stringinfo.h"
 #include "nodes/pg_list.h"
@@ -258,6 +259,22 @@ extern void SetInstallXLogFileSegmentActive(void);
 extern bool IsInstallXLogFileSegmentActive(void);
 extern void XLogShutdownWalRcv(void);
 
+extern void WALInsertLockAcquire(void);
+extern void WALInsertLockAcquireExclusive(void);
+extern void WALInsertLockRelease(void);
+extern void WALInsertLockUpdateInsertingAt(XLogRecPtr insertingAt);
+
+/*
+ * Routines used by xlogbackup.c to call back into xlog.c during backup.
+ */
+extern ControlFileData *GetControlFile(void);
+extern void SetforcePageWrites(bool need_lock, bool value);
+extern void SetrunningBackups(bool need_lock, int value);
+extern int GetrunningBackups(bool need_lock);
+extern XLogRecPtr GetlastFpwDisableRecPtr(void);
+extern XLogRecPtr GetlastBackupStart(bool need_lock);
+extern void SetlastBackupStart(bool need_lock, XLogRecPtr recptr);
+
 /*
  * Routines to start, stop, and get status of a base backup.
  */
-- 
2.34.1

From 67d17054c4a1f436f7a03d3dab9e9a9c33ff4200 Mon Sep 17 00:00:00 2001
From: Bharath Rupireddy <bharath.rupireddyforpostgres@gmail.com>
Date: Wed, 5 Oct 2022 09:07:18 +0000
Subject: [PATCH v2] Move backup-related code from xlog.c to xlogbackup.c

---
 src/backend/access/transam/xlog.c         | 706 +--------------------
 src/backend/access/transam/xlogbackup.c   | 719 ++++++++++++++++++++++
 src/backend/access/transam/xlogrecovery.c |   1 +
 src/include/access/xlog.h                 |  33 -
 src/include/access/xlogbackup.h           |  34 +
 5 files changed, 756 insertions(+), 737 deletions(-)

diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 604220b474..84175111b8 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -58,19 +58,19 @@
 #include "access/xact.h"
 #include "access/xlog_internal.h"
 #include "access/xlogarchive.h"
+#include "access/xlogbackup.h"
 #include "access/xloginsert.h"
 #include "access/xlogprefetcher.h"
 #include "access/xlogreader.h"
 #include "access/xlogrecovery.h"
 #include "access/xlogutils.h"
-#include "backup/basebackup.h"
 #include "catalog/catversion.h"
-#include "catalog/pg_control.h"
 #include "catalog/pg_database.h"
 #include "common/controldata_utils.h"
 #include "common/file_utils.h"
 #include "executor/instrument.h"
 #include "miscadmin.h"
+#include "nodes/pg_list.h"
 #include "pg_trace.h"
 #include "pgstat.h"
 #include "port/atomics.h"
@@ -86,7 +86,6 @@
 #include "replication/walsender.h"
 #include "storage/bufmgr.h"
 #include "storage/fd.h"
-#include "storage/ipc.h"
 #include "storage/large_object.h"
 #include "storage/latch.h"
 #include "storage/pmsignal.h"
@@ -393,12 +392,6 @@ typedef union WALInsertLockPadded
 	char		pad[PG_CACHE_LINE_SIZE];
 } WALInsertLockPadded;
 
-/*
- * Session status of running backup, used for sanity checks in SQL-callable
- * functions to start and stop backups.
- */
-static SessionBackupState sessionBackupState = SESSION_BACKUP_NONE;
-
 /*
  * Shared state data for WAL insertion.
  */
@@ -670,7 +663,6 @@ static void RemoveXlogFile(const struct dirent *segment_de,
 						   TimeLineID insertTLI);
 static void UpdateLastRemovedPtr(char *filename);
 static void ValidateXLOGDirectoryStructure(void);
-static void CleanupBackupHistory(void);
 static void UpdateMinRecoveryPoint(XLogRecPtr lsn, bool force);
 static bool PerformRecoveryXLogAction(void);
 static void InitControlFile(uint64 sysidentifier);
@@ -679,8 +671,6 @@ static void ReadControlFile(void);
 static void UpdateControlFile(void);
 static char *str_time(pg_time_t tnow);
 
-static void pg_backup_start_callback(int code, Datum arg);
-
 static int	get_sync_bit(int method);
 
 static void CopyXLogRecordToWAL(int write_len, bool isLogSwitch,
@@ -3817,38 +3807,6 @@ ValidateXLOGDirectoryStructure(void)
 	}
 }
 
-/*
- * Remove previous backup history files.  This also retries creation of
- * .ready files for any backup history files for which XLogArchiveNotify
- * failed earlier.
- */
-static void
-CleanupBackupHistory(void)
-{
-	DIR		   *xldir;
-	struct dirent *xlde;
-	char		path[MAXPGPATH + sizeof(XLOGDIR)];
-
-	xldir = AllocateDir(XLOGDIR);
-
-	while ((xlde = ReadDir(xldir, XLOGDIR)) != NULL)
-	{
-		if (IsBackupHistoryFileName(xlde->d_name))
-		{
-			if (XLogArchiveCheckDone(xlde->d_name))
-			{
-				elog(DEBUG2, "removing WAL backup history file \"%s\"",
-					 xlde->d_name);
-				snprintf(path, sizeof(path), XLOGDIR "/%s", xlde->d_name);
-				unlink(path);
-				XLogArchiveCleanup(xlde->d_name);
-			}
-		}
-	}
-
-	FreeDir(xldir);
-}
-
 /*
  * I/O routines for pg_control
  *
@@ -8235,666 +8193,6 @@ issue_xlog_fsync(int fd, XLogSegNo segno, TimeLineID tli)
 	PendingWalStats.wal_sync++;
 }
 
-/*
- * do_pg_backup_start is the workhorse of the user-visible pg_backup_start()
- * function. It creates the necessary starting checkpoint and constructs the
- * backup state and tablespace map.
- *
- * Input parameters are "state" (the backup state), "fast" (if true, we do
- * the checkpoint in immediate mode to make it faster), and "tablespaces"
- * (if non-NULL, indicates a list of tablespaceinfo structs describing the
- * cluster's tablespaces.).
- *
- * The tablespace map contents are appended to passed-in parameter
- * tablespace_map and the caller is responsible for including it in the backup
- * archive as 'tablespace_map'. The tablespace_map file is required mainly for
- * tar format in windows as native windows utilities are not able to create
- * symlinks while extracting files from tar. However for consistency and
- * platform-independence, we do it the same way everywhere.
- *
- * It fills in "state" with the information required for the backup, such
- * as the minimum WAL location that must be present to restore from this
- * backup (starttli) and the corresponding timeline ID (starttli).
- *
- * Every successfully started backup must be stopped by calling
- * do_pg_backup_stop() or do_pg_abort_backup(). There can be many
- * backups active at the same time.
- *
- * It is the responsibility of the caller of this function to verify the
- * permissions of the calling user!
- */
-void
-do_pg_backup_start(const char *backupidstr, bool fast, List **tablespaces,
-				   BackupState *state, StringInfo tblspcmapfile)
-{
-	bool		backup_started_in_recovery = false;
-
-	Assert(state != NULL);
-	backup_started_in_recovery = RecoveryInProgress();
-
-	/*
-	 * During recovery, we don't need to check WAL level. Because, if WAL
-	 * level is not sufficient, it's impossible to get here during recovery.
-	 */
-	if (!backup_started_in_recovery && !XLogIsNeeded())
-		ereport(ERROR,
-				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
-				 errmsg("WAL level not sufficient for making an online backup"),
-				 errhint("wal_level must be set to \"replica\" or \"logical\" at server start.")));
-
-	if (strlen(backupidstr) > MAXPGPATH)
-		ereport(ERROR,
-				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
-				 errmsg("backup label too long (max %d bytes)",
-						MAXPGPATH)));
-
-	memcpy(state->name, backupidstr, strlen(backupidstr));
-
-	/*
-	 * Mark backup active in shared memory.  We must do full-page WAL writes
-	 * during an on-line backup even if not doing so at other times, because
-	 * it's quite possible for the backup dump to obtain a "torn" (partially
-	 * written) copy of a database page if it reads the page concurrently with
-	 * our write to the same page.  This can be fixed as long as the first
-	 * write to the page in the WAL sequence is a full-page write. Hence, we
-	 * turn on forcePageWrites and then force a CHECKPOINT, to ensure there
-	 * are no dirty pages in shared memory that might get dumped while the
-	 * backup is in progress without having a corresponding WAL record.  (Once
-	 * the backup is complete, we need not force full-page writes anymore,
-	 * since we expect that any pages not modified during the backup interval
-	 * must have been correctly captured by the backup.)
-	 *
-	 * Note that forcePageWrites has no effect during an online backup from
-	 * the standby.
-	 *
-	 * We must hold all the insertion locks to change the value of
-	 * forcePageWrites, to ensure adequate interlocking against
-	 * XLogInsertRecord().
-	 */
-	WALInsertLockAcquireExclusive();
-	XLogCtl->Insert.runningBackups++;
-	XLogCtl->Insert.forcePageWrites = true;
-	WALInsertLockRelease();
-
-	/* Ensure we release forcePageWrites if fail below */
-	PG_ENSURE_ERROR_CLEANUP(pg_backup_start_callback, (Datum) 0);
-	{
-		bool		gotUniqueStartpoint = false;
-		DIR		   *tblspcdir;
-		struct dirent *de;
-		tablespaceinfo *ti;
-		int			datadirpathlen;
-
-		/*
-		 * Force an XLOG file switch before the checkpoint, to ensure that the
-		 * WAL segment the checkpoint is written to doesn't contain pages with
-		 * old timeline IDs.  That would otherwise happen if you called
-		 * pg_backup_start() right after restoring from a PITR archive: the
-		 * first WAL segment containing the startup checkpoint has pages in
-		 * the beginning with the old timeline ID.  That can cause trouble at
-		 * recovery: we won't have a history file covering the old timeline if
-		 * pg_wal directory was not included in the base backup and the WAL
-		 * archive was cleared too before starting the backup.
-		 *
-		 * This also ensures that we have emitted a WAL page header that has
-		 * XLP_BKP_REMOVABLE off before we emit the checkpoint record.
-		 * Therefore, if a WAL archiver (such as pglesslog) is trying to
-		 * compress out removable backup blocks, it won't remove any that
-		 * occur after this point.
-		 *
-		 * During recovery, we skip forcing XLOG file switch, which means that
-		 * the backup taken during recovery is not available for the special
-		 * recovery case described above.
-		 */
-		if (!backup_started_in_recovery)
-			RequestXLogSwitch(false);
-
-		do
-		{
-			bool		checkpointfpw;
-
-			/*
-			 * Force a CHECKPOINT.  Aside from being necessary to prevent torn
-			 * page problems, this guarantees that two successive backup runs
-			 * will have different checkpoint positions and hence different
-			 * history file names, even if nothing happened in between.
-			 *
-			 * During recovery, establish a restartpoint if possible. We use
-			 * the last restartpoint as the backup starting checkpoint. This
-			 * means that two successive backup runs can have same checkpoint
-			 * positions.
-			 *
-			 * Since the fact that we are executing do_pg_backup_start()
-			 * during recovery means that checkpointer is running, we can use
-			 * RequestCheckpoint() to establish a restartpoint.
-			 *
-			 * We use CHECKPOINT_IMMEDIATE only if requested by user (via
-			 * passing fast = true).  Otherwise this can take awhile.
-			 */
-			RequestCheckpoint(CHECKPOINT_FORCE | CHECKPOINT_WAIT |
-							  (fast ? CHECKPOINT_IMMEDIATE : 0));
-
-			/*
-			 * Now we need to fetch the checkpoint record location, and also
-			 * its REDO pointer.  The oldest point in WAL that would be needed
-			 * to restore starting from the checkpoint is precisely the REDO
-			 * pointer.
-			 */
-			LWLockAcquire(ControlFileLock, LW_SHARED);
-			state->checkpointloc = ControlFile->checkPoint;
-			state->startpoint = ControlFile->checkPointCopy.redo;
-			state->starttli = ControlFile->checkPointCopy.ThisTimeLineID;
-			checkpointfpw = ControlFile->checkPointCopy.fullPageWrites;
-			LWLockRelease(ControlFileLock);
-
-			if (backup_started_in_recovery)
-			{
-				XLogRecPtr	recptr;
-
-				/*
-				 * Check to see if all WAL replayed during online backup
-				 * (i.e., since last restartpoint used as backup starting
-				 * checkpoint) contain full-page writes.
-				 */
-				SpinLockAcquire(&XLogCtl->info_lck);
-				recptr = XLogCtl->lastFpwDisableRecPtr;
-				SpinLockRelease(&XLogCtl->info_lck);
-
-				if (!checkpointfpw || state->startpoint <= recptr)
-					ereport(ERROR,
-							(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
-							 errmsg("WAL generated with full_page_writes=off was replayed "
-									"since last restartpoint"),
-							 errhint("This means that the backup being taken on the standby "
-									 "is corrupt and should not be used. "
-									 "Enable full_page_writes and run CHECKPOINT on the primary, "
-									 "and then try an online backup again.")));
-
-				/*
-				 * During recovery, since we don't use the end-of-backup WAL
-				 * record and don't write the backup history file, the
-				 * starting WAL location doesn't need to be unique. This means
-				 * that two base backups started at the same time might use
-				 * the same checkpoint as starting locations.
-				 */
-				gotUniqueStartpoint = true;
-			}
-
-			/*
-			 * If two base backups are started at the same time (in WAL sender
-			 * processes), we need to make sure that they use different
-			 * checkpoints as starting locations, because we use the starting
-			 * WAL location as a unique identifier for the base backup in the
-			 * end-of-backup WAL record and when we write the backup history
-			 * file. Perhaps it would be better generate a separate unique ID
-			 * for each backup instead of forcing another checkpoint, but
-			 * taking a checkpoint right after another is not that expensive
-			 * either because only few buffers have been dirtied yet.
-			 */
-			WALInsertLockAcquireExclusive();
-			if (XLogCtl->Insert.lastBackupStart < state->startpoint)
-			{
-				XLogCtl->Insert.lastBackupStart = state->startpoint;
-				gotUniqueStartpoint = true;
-			}
-			WALInsertLockRelease();
-		} while (!gotUniqueStartpoint);
-
-		/*
-		 * Construct tablespace_map file.
-		 */
-		datadirpathlen = strlen(DataDir);
-
-		/* Collect information about all tablespaces */
-		tblspcdir = AllocateDir("pg_tblspc");
-		while ((de = ReadDir(tblspcdir, "pg_tblspc")) != NULL)
-		{
-			char		fullpath[MAXPGPATH + 10];
-			char		linkpath[MAXPGPATH];
-			char	   *relpath = NULL;
-			int			rllen;
-			StringInfoData escapedpath;
-			char	   *s;
-
-			/* Skip anything that doesn't look like a tablespace */
-			if (strspn(de->d_name, "0123456789") != strlen(de->d_name))
-				continue;
-
-			snprintf(fullpath, sizeof(fullpath), "pg_tblspc/%s", de->d_name);
-
-			/*
-			 * Skip anything that isn't a symlink/junction.  For testing only,
-			 * we sometimes use allow_in_place_tablespaces to create
-			 * directories directly under pg_tblspc, which would fail below.
-			 */
-			if (get_dirent_type(fullpath, de, false, ERROR) != PGFILETYPE_LNK)
-				continue;
-
-			rllen = readlink(fullpath, linkpath, sizeof(linkpath));
-			if (rllen < 0)
-			{
-				ereport(WARNING,
-						(errmsg("could not read symbolic link \"%s\": %m",
-								fullpath)));
-				continue;
-			}
-			else if (rllen >= sizeof(linkpath))
-			{
-				ereport(WARNING,
-						(errmsg("symbolic link \"%s\" target is too long",
-								fullpath)));
-				continue;
-			}
-			linkpath[rllen] = '\0';
-
-			/*
-			 * Build a backslash-escaped version of the link path to include
-			 * in the tablespace map file.
-			 */
-			initStringInfo(&escapedpath);
-			for (s = linkpath; *s; s++)
-			{
-				if (*s == '\n' || *s == '\r' || *s == '\\')
-					appendStringInfoChar(&escapedpath, '\\');
-				appendStringInfoChar(&escapedpath, *s);
-			}
-
-			/*
-			 * Relpath holds the relative path of the tablespace directory
-			 * when it's located within PGDATA, or NULL if it's located
-			 * elsewhere.
-			 */
-			if (rllen > datadirpathlen &&
-				strncmp(linkpath, DataDir, datadirpathlen) == 0 &&
-				IS_DIR_SEP(linkpath[datadirpathlen]))
-				relpath = linkpath + datadirpathlen + 1;
-
-			ti = palloc(sizeof(tablespaceinfo));
-			ti->oid = pstrdup(de->d_name);
-			ti->path = pstrdup(linkpath);
-			ti->rpath = relpath ? pstrdup(relpath) : NULL;
-			ti->size = -1;
-
-			if (tablespaces)
-				*tablespaces = lappend(*tablespaces, ti);
-
-			appendStringInfo(tblspcmapfile, "%s %s\n",
-							 ti->oid, escapedpath.data);
-
-			pfree(escapedpath.data);
-		}
-		FreeDir(tblspcdir);
-
-		state->starttime = (pg_time_t) time(NULL);
-	}
-	PG_END_ENSURE_ERROR_CLEANUP(pg_backup_start_callback, (Datum) 0);
-
-	state->started_in_recovery = backup_started_in_recovery;
-
-	/*
-	 * Mark that the start phase has correctly finished for the backup.
-	 */
-	sessionBackupState = SESSION_BACKUP_RUNNING;
-}
-
-/* Error cleanup callback for pg_backup_start */
-static void
-pg_backup_start_callback(int code, Datum arg)
-{
-	/* Update backup counters and forcePageWrites on failure */
-	WALInsertLockAcquireExclusive();
-
-	Assert(XLogCtl->Insert.runningBackups > 0);
-	XLogCtl->Insert.runningBackups--;
-
-	if (XLogCtl->Insert.runningBackups == 0)
-	{
-		XLogCtl->Insert.forcePageWrites = false;
-	}
-	WALInsertLockRelease();
-}
-
-/*
- * Utility routine to fetch the session-level status of a backup running.
- */
-SessionBackupState
-get_backup_status(void)
-{
-	return sessionBackupState;
-}
-
-/*
- * do_pg_backup_stop
- *
- * Utility function called at the end of an online backup.  It creates history
- * file (if required), resets sessionBackupState and so on.  It can optionally
- * wait for WAL segments to be archived.
- *
- * "state" is filled with the information necessary to restore from this
- * backup with its stop LSN (stoppoint), its timeline ID (stoptli), etc.
- *
- * It is the responsibility of the caller of this function to verify the
- * permissions of the calling user!
- */
-void
-do_pg_backup_stop(BackupState *state, bool waitforarchive)
-{
-	bool		backup_stopped_in_recovery = false;
-	char		histfilepath[MAXPGPATH];
-	char		lastxlogfilename[MAXFNAMELEN];
-	char		histfilename[MAXFNAMELEN];
-	XLogSegNo	_logSegNo;
-	FILE	   *fp;
-	int			seconds_before_warning;
-	int			waits = 0;
-	bool		reported_waiting = false;
-
-	Assert(state != NULL);
-
-	backup_stopped_in_recovery = RecoveryInProgress();
-
-	/*
-	 * During recovery, we don't need to check WAL level. Because, if WAL
-	 * level is not sufficient, it's impossible to get here during recovery.
-	 */
-	if (!backup_stopped_in_recovery && !XLogIsNeeded())
-		ereport(ERROR,
-				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
-				 errmsg("WAL level not sufficient for making an online backup"),
-				 errhint("wal_level must be set to \"replica\" or \"logical\" at server start.")));
-
-	/*
-	 * OK to update backup counters, forcePageWrites, and session-level lock.
-	 *
-	 * Note that CHECK_FOR_INTERRUPTS() must not occur while updating them.
-	 * Otherwise they can be updated inconsistently, and which might cause
-	 * do_pg_abort_backup() to fail.
-	 */
-	WALInsertLockAcquireExclusive();
-
-	/*
-	 * It is expected that each do_pg_backup_start() call is matched by
-	 * exactly one do_pg_backup_stop() call.
-	 */
-	Assert(XLogCtl->Insert.runningBackups > 0);
-	XLogCtl->Insert.runningBackups--;
-
-	if (XLogCtl->Insert.runningBackups == 0)
-	{
-		XLogCtl->Insert.forcePageWrites = false;
-	}
-
-	/*
-	 * Clean up session-level lock.
-	 *
-	 * You might think that WALInsertLockRelease() can be called before
-	 * cleaning up session-level lock because session-level lock doesn't need
-	 * to be protected with WAL insertion lock. But since
-	 * CHECK_FOR_INTERRUPTS() can occur in it, session-level lock must be
-	 * cleaned up before it.
-	 */
-	sessionBackupState = SESSION_BACKUP_NONE;
-
-	WALInsertLockRelease();
-
-	/*
-	 * If we are taking an online backup from the standby, we confirm that the
-	 * standby has not been promoted during the backup.
-	 */
-	if (state->started_in_recovery && !backup_stopped_in_recovery)
-		ereport(ERROR,
-				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
-				 errmsg("the standby was promoted during online backup"),
-				 errhint("This means that the backup being taken is corrupt "
-						 "and should not be used. "
-						 "Try taking another online backup.")));
-
-	/*
-	 * During recovery, we don't write an end-of-backup record. We assume that
-	 * pg_control was backed up last and its minimum recovery point can be
-	 * available as the backup end location. Since we don't have an
-	 * end-of-backup record, we use the pg_control value to check whether
-	 * we've reached the end of backup when starting recovery from this
-	 * backup. We have no way of checking if pg_control wasn't backed up last
-	 * however.
-	 *
-	 * We don't force a switch to new WAL file but it is still possible to
-	 * wait for all the required files to be archived if waitforarchive is
-	 * true. This is okay if we use the backup to start a standby and fetch
-	 * the missing WAL using streaming replication. But in the case of an
-	 * archive recovery, a user should set waitforarchive to true and wait for
-	 * them to be archived to ensure that all the required files are
-	 * available.
-	 *
-	 * We return the current minimum recovery point as the backup end
-	 * location. Note that it can be greater than the exact backup end
-	 * location if the minimum recovery point is updated after the backup of
-	 * pg_control. This is harmless for current uses.
-	 *
-	 * XXX currently a backup history file is for informational and debug
-	 * purposes only. It's not essential for an online backup. Furthermore,
-	 * even if it's created, it will not be archived during recovery because
-	 * an archiver is not invoked. So it doesn't seem worthwhile to write a
-	 * backup history file during recovery.
-	 */
-	if (backup_stopped_in_recovery)
-	{
-		XLogRecPtr	recptr;
-
-		/*
-		 * Check to see if all WAL replayed during online backup contain
-		 * full-page writes.
-		 */
-		SpinLockAcquire(&XLogCtl->info_lck);
-		recptr = XLogCtl->lastFpwDisableRecPtr;
-		SpinLockRelease(&XLogCtl->info_lck);
-
-		if (state->startpoint <= recptr)
-			ereport(ERROR,
-					(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
-					 errmsg("WAL generated with full_page_writes=off was replayed "
-							"during online backup"),
-					 errhint("This means that the backup being taken on the standby "
-							 "is corrupt and should not be used. "
-							 "Enable full_page_writes and run CHECKPOINT on the primary, "
-							 "and then try an online backup again.")));
-
-
-		LWLockAcquire(ControlFileLock, LW_SHARED);
-		state->stoppoint = ControlFile->minRecoveryPoint;
-		state->stoptli = ControlFile->minRecoveryPointTLI;
-		LWLockRelease(ControlFileLock);
-	}
-	else
-	{
-		char	   *history_file;
-
-		/*
-		 * Write the backup-end xlog record
-		 */
-		XLogBeginInsert();
-		XLogRegisterData((char *) (&state->startpoint),
-						 sizeof(state->startpoint));
-		state->stoppoint = XLogInsert(RM_XLOG_ID, XLOG_BACKUP_END);
-
-		/*
-		 * Given that we're not in recovery, InsertTimeLineID is set and can't
-		 * change, so we can read it without a lock.
-		 */
-		state->stoptli = XLogCtl->InsertTimeLineID;
-
-		/*
-		 * Force a switch to a new xlog segment file, so that the backup is
-		 * valid as soon as archiver moves out the current segment file.
-		 */
-		RequestXLogSwitch(false);
-
-		state->stoptime = (pg_time_t) time(NULL);
-
-		/*
-		 * Write the backup history file
-		 */
-		XLByteToSeg(state->startpoint, _logSegNo, wal_segment_size);
-		BackupHistoryFilePath(histfilepath, state->stoptli, _logSegNo,
-							  state->startpoint, wal_segment_size);
-		fp = AllocateFile(histfilepath, "w");
-		if (!fp)
-			ereport(ERROR,
-					(errcode_for_file_access(),
-					 errmsg("could not create file \"%s\": %m",
-							histfilepath)));
-
-		/* Build and save the contents of the backup history file */
-		history_file = build_backup_content(state, true);
-		fprintf(fp, "%s", history_file);
-		pfree(history_file);
-
-		if (fflush(fp) || ferror(fp) || FreeFile(fp))
-			ereport(ERROR,
-					(errcode_for_file_access(),
-					 errmsg("could not write file \"%s\": %m",
-							histfilepath)));
-
-		/*
-		 * Clean out any no-longer-needed history files.  As a side effect,
-		 * this will post a .ready file for the newly created history file,
-		 * notifying the archiver that history file may be archived
-		 * immediately.
-		 */
-		CleanupBackupHistory();
-	}
-
-	/*
-	 * If archiving is enabled, wait for all the required WAL files to be
-	 * archived before returning. If archiving isn't enabled, the required WAL
-	 * needs to be transported via streaming replication (hopefully with
-	 * wal_keep_size set high enough), or some more exotic mechanism like
-	 * polling and copying files from pg_wal with script. We have no knowledge
-	 * of those mechanisms, so it's up to the user to ensure that he gets all
-	 * the required WAL.
-	 *
-	 * We wait until both the last WAL file filled during backup and the
-	 * history file have been archived, and assume that the alphabetic sorting
-	 * property of the WAL files ensures any earlier WAL files are safely
-	 * archived as well.
-	 *
-	 * We wait forever, since archive_command is supposed to work and we
-	 * assume the admin wanted his backup to work completely. If you don't
-	 * wish to wait, then either waitforarchive should be passed in as false,
-	 * or you can set statement_timeout.  Also, some notices are issued to
-	 * clue in anyone who might be doing this interactively.
-	 */
-
-	if (waitforarchive &&
-		((!backup_stopped_in_recovery && XLogArchivingActive()) ||
-		 (backup_stopped_in_recovery && XLogArchivingAlways())))
-	{
-		XLByteToPrevSeg(state->stoppoint, _logSegNo, wal_segment_size);
-		XLogFileName(lastxlogfilename, state->stoptli, _logSegNo,
-					 wal_segment_size);
-
-		XLByteToSeg(state->startpoint, _logSegNo, wal_segment_size);
-		BackupHistoryFileName(histfilename, state->stoptli, _logSegNo,
-							  state->startpoint, wal_segment_size);
-
-		seconds_before_warning = 60;
-		waits = 0;
-
-		while (XLogArchiveIsBusy(lastxlogfilename) ||
-			   XLogArchiveIsBusy(histfilename))
-		{
-			CHECK_FOR_INTERRUPTS();
-
-			if (!reported_waiting && waits > 5)
-			{
-				ereport(NOTICE,
-						(errmsg("base backup done, waiting for required WAL segments to be archived")));
-				reported_waiting = true;
-			}
-
-			(void) WaitLatch(MyLatch,
-							 WL_LATCH_SET | WL_TIMEOUT | WL_EXIT_ON_PM_DEATH,
-							 1000L,
-							 WAIT_EVENT_BACKUP_WAIT_WAL_ARCHIVE);
-			ResetLatch(MyLatch);
-
-			if (++waits >= seconds_before_warning)
-			{
-				seconds_before_warning *= 2;	/* This wraps in >10 years... */
-				ereport(WARNING,
-						(errmsg("still waiting for all required WAL segments to be archived (%d seconds elapsed)",
-								waits),
-						 errhint("Check that your archive_command is executing properly.  "
-								 "You can safely cancel this backup, "
-								 "but the database backup will not be usable without all the WAL segments.")));
-			}
-		}
-
-		ereport(NOTICE,
-				(errmsg("all required WAL segments have been archived")));
-	}
-	else if (waitforarchive)
-		ereport(NOTICE,
-				(errmsg("WAL archiving is not enabled; you must ensure that all required WAL segments are copied through other means to complete the backup")));
-}
-
-
-/*
- * do_pg_abort_backup: abort a running backup
- *
- * This does just the most basic steps of do_pg_backup_stop(), by taking the
- * system out of backup mode, thus making it a lot more safe to call from
- * an error handler.
- *
- * The caller can pass 'arg' as 'true' or 'false' to control whether a warning
- * is emitted.
- *
- * NB: This gets used as a before_shmem_exit handler, hence the odd-looking
- * signature.
- */
-void
-do_pg_abort_backup(int code, Datum arg)
-{
-	bool		emit_warning = DatumGetBool(arg);
-
-	/*
-	 * Quick exit if session does not have a running backup.
-	 */
-	if (sessionBackupState != SESSION_BACKUP_RUNNING)
-		return;
-
-	WALInsertLockAcquireExclusive();
-	Assert(XLogCtl->Insert.runningBackups > 0);
-	XLogCtl->Insert.runningBackups--;
-
-	if (XLogCtl->Insert.runningBackups == 0)
-	{
-		XLogCtl->Insert.forcePageWrites = false;
-	}
-
-	sessionBackupState = SESSION_BACKUP_NONE;
-	WALInsertLockRelease();
-
-	if (emit_warning)
-		ereport(WARNING,
-				(errmsg("aborting backup due to backend exiting before pg_backup_stop was called")));
-}
-
-/*
- * Register a handler that will warn about unterminated backups at end of
- * session, unless this has already been done.
- */
-void
-register_persistent_abort_backup_handler(void)
-{
-	static bool already_done = false;
-
-	if (already_done)
-		return;
-	before_shmem_exit(do_pg_abort_backup, DatumGetBool(true));
-	already_done = true;
-}
-
 /*
  * Get latest WAL insert pointer
  */
diff --git a/src/backend/access/transam/xlogbackup.c b/src/backend/access/transam/xlogbackup.c
index 90b5273b02..62cea966a5 100644
--- a/src/backend/access/transam/xlogbackup.c
+++ b/src/backend/access/transam/xlogbackup.c
@@ -13,9 +13,32 @@
 
 #include "postgres.h"
 
+#include <time.h>
+#include <unistd.h>
+
 #include "access/xlog.h"
 #include "access/xlog_internal.h"
+#include "access/xlogarchive.h"
 #include "access/xlogbackup.h"
+#include "access/xloginsert.h"
+#include "backup/basebackup.h"
+#include "common/file_utils.h"
+#include "miscadmin.h"
+#include "postmaster/bgwriter.h"
+#include "storage/fd.h"
+#include "storage/ipc.h"
+#include "storage/latch.h"
+#include "storage/lwlock.h"
+#include "utils/wait_event.h"
+
+/*
+ * Session status of running backup, used for sanity checks in SQL-callable
+ * functions to start and stop backups.
+ */
+static SessionBackupState sessionBackupState = SESSION_BACKUP_NONE;
+
+static void pg_backup_start_callback(int code, Datum arg);
+static void CleanupBackupHistory(void);
 
 /*
  * Build contents for backup_label or backup history file.
@@ -82,3 +105,699 @@ build_backup_content(BackupState *state, bool ishistoryfile)
 
 	return data;
 }
+
+/*
+ * Remove previous backup history files.  This also retries creation of
+ * .ready files for any backup history files for which XLogArchiveNotify
+ * failed earlier.
+ */
+static void
+CleanupBackupHistory(void)
+{
+	DIR		   *xldir;
+	struct dirent *xlde;
+	char		path[MAXPGPATH + sizeof(XLOGDIR)];
+
+	xldir = AllocateDir(XLOGDIR);
+
+	while ((xlde = ReadDir(xldir, XLOGDIR)) != NULL)
+	{
+		if (IsBackupHistoryFileName(xlde->d_name))
+		{
+			if (XLogArchiveCheckDone(xlde->d_name))
+			{
+				elog(DEBUG2, "removing WAL backup history file \"%s\"",
+					 xlde->d_name);
+				snprintf(path, sizeof(path), XLOGDIR "/%s", xlde->d_name);
+				unlink(path);
+				XLogArchiveCleanup(xlde->d_name);
+			}
+		}
+	}
+
+	FreeDir(xldir);
+}
+
+/*
+ * do_pg_backup_start is the workhorse of the user-visible pg_backup_start()
+ * function. It creates the necessary starting checkpoint and constructs the
+ * backup state and tablespace map.
+ *
+ * Input parameters are "state" (the backup state), "fast" (if true, we do
+ * the checkpoint in immediate mode to make it faster), and "tablespaces"
+ * (if non-NULL, indicates a list of tablespaceinfo structs describing the
+ * cluster's tablespaces.).
+ *
+ * The tablespace map contents are appended to passed-in parameter
+ * tablespace_map and the caller is responsible for including it in the backup
+ * archive as 'tablespace_map'. The tablespace_map file is required mainly for
+ * tar format in windows as native windows utilities are not able to create
+ * symlinks while extracting files from tar. However for consistency and
+ * platform-independence, we do it the same way everywhere.
+ *
+ * It fills in "state" with the information required for the backup, such
+ * as the minimum WAL location that must be present to restore from this
+ * backup (starttli) and the corresponding timeline ID (starttli).
+ *
+ * Every successfully started backup must be stopped by calling
+ * do_pg_backup_stop() or do_pg_abort_backup(). There can be many
+ * backups active at the same time.
+ *
+ * It is the responsibility of the caller of this function to verify the
+ * permissions of the calling user!
+ */
+void
+do_pg_backup_start(const char *backupidstr, bool fast, List **tablespaces,
+				   BackupState *state, StringInfo tblspcmapfile)
+{
+	bool		backup_started_in_recovery = false;
+	int 		runningBackups;
+	ControlFileData	*ControlFile = GetControlFile();
+
+	Assert(state != NULL);
+	backup_started_in_recovery = RecoveryInProgress();
+
+	/*
+	 * During recovery, we don't need to check WAL level. Because, if WAL
+	 * level is not sufficient, it's impossible to get here during recovery.
+	 */
+	if (!backup_started_in_recovery && !XLogIsNeeded())
+		ereport(ERROR,
+				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+				 errmsg("WAL level not sufficient for making an online backup"),
+				 errhint("wal_level must be set to \"replica\" or \"logical\" at server start.")));
+
+	if (strlen(backupidstr) > MAXPGPATH)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("backup label too long (max %d bytes)",
+						MAXPGPATH)));
+
+	memcpy(state->name, backupidstr, strlen(backupidstr));
+
+	/*
+	 * Mark backup active in shared memory.  We must do full-page WAL writes
+	 * during an on-line backup even if not doing so at other times, because
+	 * it's quite possible for the backup dump to obtain a "torn" (partially
+	 * written) copy of a database page if it reads the page concurrently with
+	 * our write to the same page.  This can be fixed as long as the first
+	 * write to the page in the WAL sequence is a full-page write. Hence, we
+	 * turn on forcePageWrites and then force a CHECKPOINT, to ensure there
+	 * are no dirty pages in shared memory that might get dumped while the
+	 * backup is in progress without having a corresponding WAL record.  (Once
+	 * the backup is complete, we need not force full-page writes anymore,
+	 * since we expect that any pages not modified during the backup interval
+	 * must have been correctly captured by the backup.)
+	 *
+	 * Note that forcePageWrites has no effect during an online backup from
+	 * the standby.
+	 *
+	 * We must hold all the insertion locks to change the value of
+	 * forcePageWrites, to ensure adequate interlocking against
+	 * XLogInsertRecord().
+	 */
+	WALInsertLockAcquireExclusive();
+	runningBackups = GetrunningBackups(false);
+	SetrunningBackups(false, ++runningBackups);
+	SetforcePageWrites(false, true);
+	WALInsertLockRelease();
+
+	/* Ensure we release forcePageWrites if fail below */
+	PG_ENSURE_ERROR_CLEANUP(pg_backup_start_callback, (Datum) 0);
+	{
+		bool		gotUniqueStartpoint = false;
+		DIR		   *tblspcdir;
+		struct dirent *de;
+		tablespaceinfo *ti;
+		int			datadirpathlen;
+
+		/*
+		 * Force an XLOG file switch before the checkpoint, to ensure that the
+		 * WAL segment the checkpoint is written to doesn't contain pages with
+		 * old timeline IDs.  That would otherwise happen if you called
+		 * pg_backup_start() right after restoring from a PITR archive: the
+		 * first WAL segment containing the startup checkpoint has pages in
+		 * the beginning with the old timeline ID.  That can cause trouble at
+		 * recovery: we won't have a history file covering the old timeline if
+		 * pg_wal directory was not included in the base backup and the WAL
+		 * archive was cleared too before starting the backup.
+		 *
+		 * This also ensures that we have emitted a WAL page header that has
+		 * XLP_BKP_REMOVABLE off before we emit the checkpoint record.
+		 * Therefore, if a WAL archiver (such as pglesslog) is trying to
+		 * compress out removable backup blocks, it won't remove any that
+		 * occur after this point.
+		 *
+		 * During recovery, we skip forcing XLOG file switch, which means that
+		 * the backup taken during recovery is not available for the special
+		 * recovery case described above.
+		 */
+		if (!backup_started_in_recovery)
+			RequestXLogSwitch(false);
+
+		do
+		{
+			bool		checkpointfpw;
+
+			/*
+			 * Force a CHECKPOINT.  Aside from being necessary to prevent torn
+			 * page problems, this guarantees that two successive backup runs
+			 * will have different checkpoint positions and hence different
+			 * history file names, even if nothing happened in between.
+			 *
+			 * During recovery, establish a restartpoint if possible. We use
+			 * the last restartpoint as the backup starting checkpoint. This
+			 * means that two successive backup runs can have same checkpoint
+			 * positions.
+			 *
+			 * Since the fact that we are executing do_pg_backup_start()
+			 * during recovery means that checkpointer is running, we can use
+			 * RequestCheckpoint() to establish a restartpoint.
+			 *
+			 * We use CHECKPOINT_IMMEDIATE only if requested by user (via
+			 * passing fast = true).  Otherwise this can take awhile.
+			 */
+			RequestCheckpoint(CHECKPOINT_FORCE | CHECKPOINT_WAIT |
+							  (fast ? CHECKPOINT_IMMEDIATE : 0));
+
+			/*
+			 * Now we need to fetch the checkpoint record location, and also
+			 * its REDO pointer.  The oldest point in WAL that would be needed
+			 * to restore starting from the checkpoint is precisely the REDO
+			 * pointer.
+			 */
+			LWLockAcquire(ControlFileLock, LW_SHARED);
+			state->checkpointloc = ControlFile->checkPoint;
+			state->startpoint = ControlFile->checkPointCopy.redo;
+			state->starttli = ControlFile->checkPointCopy.ThisTimeLineID;
+			checkpointfpw = ControlFile->checkPointCopy.fullPageWrites;
+			LWLockRelease(ControlFileLock);
+
+			if (backup_started_in_recovery)
+			{
+				XLogRecPtr	recptr;
+
+				/*
+				 * Check to see if all WAL replayed during online backup
+				 * (i.e., since last restartpoint used as backup starting
+				 * checkpoint) contain full-page writes.
+				 */
+				recptr = GetlastFpwDisableRecPtr();
+
+				if (!checkpointfpw || state->startpoint <= recptr)
+					ereport(ERROR,
+							(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+							 errmsg("WAL generated with full_page_writes=off was replayed "
+									"since last restartpoint"),
+							 errhint("This means that the backup being taken on the standby "
+									 "is corrupt and should not be used. "
+									 "Enable full_page_writes and run CHECKPOINT on the primary, "
+									 "and then try an online backup again.")));
+
+				/*
+				 * During recovery, since we don't use the end-of-backup WAL
+				 * record and don't write the backup history file, the
+				 * starting WAL location doesn't need to be unique. This means
+				 * that two base backups started at the same time might use
+				 * the same checkpoint as starting locations.
+				 */
+				gotUniqueStartpoint = true;
+			}
+
+			/*
+			 * If two base backups are started at the same time (in WAL sender
+			 * processes), we need to make sure that they use different
+			 * checkpoints as starting locations, because we use the starting
+			 * WAL location as a unique identifier for the base backup in the
+			 * end-of-backup WAL record and when we write the backup history
+			 * file. Perhaps it would be better generate a separate unique ID
+			 * for each backup instead of forcing another checkpoint, but
+			 * taking a checkpoint right after another is not that expensive
+			 * either because only few buffers have been dirtied yet.
+			 */
+			WALInsertLockAcquireExclusive();
+			if (GetlastBackupStart(false) < state->startpoint)
+			{
+				SetlastBackupStart(false, state->startpoint);
+				gotUniqueStartpoint = true;
+			}
+			WALInsertLockRelease();
+		} while (!gotUniqueStartpoint);
+
+		/*
+		 * Construct tablespace_map file.
+		 */
+		datadirpathlen = strlen(DataDir);
+
+		/* Collect information about all tablespaces */
+		tblspcdir = AllocateDir("pg_tblspc");
+		while ((de = ReadDir(tblspcdir, "pg_tblspc")) != NULL)
+		{
+			char		fullpath[MAXPGPATH + 10];
+			char		linkpath[MAXPGPATH];
+			char	   *relpath = NULL;
+			int			rllen;
+			StringInfoData escapedpath;
+			char	   *s;
+
+			/* Skip anything that doesn't look like a tablespace */
+			if (strspn(de->d_name, "0123456789") != strlen(de->d_name))
+				continue;
+
+			snprintf(fullpath, sizeof(fullpath), "pg_tblspc/%s", de->d_name);
+
+			/*
+			 * Skip anything that isn't a symlink/junction.  For testing only,
+			 * we sometimes use allow_in_place_tablespaces to create
+			 * directories directly under pg_tblspc, which would fail below.
+			 */
+			if (get_dirent_type(fullpath, de, false, ERROR) != PGFILETYPE_LNK)
+				continue;
+
+			rllen = readlink(fullpath, linkpath, sizeof(linkpath));
+			if (rllen < 0)
+			{
+				ereport(WARNING,
+						(errmsg("could not read symbolic link \"%s\": %m",
+								fullpath)));
+				continue;
+			}
+			else if (rllen >= sizeof(linkpath))
+			{
+				ereport(WARNING,
+						(errmsg("symbolic link \"%s\" target is too long",
+								fullpath)));
+				continue;
+			}
+			linkpath[rllen] = '\0';
+
+			/*
+			 * Build a backslash-escaped version of the link path to include
+			 * in the tablespace map file.
+			 */
+			initStringInfo(&escapedpath);
+			for (s = linkpath; *s; s++)
+			{
+				if (*s == '\n' || *s == '\r' || *s == '\\')
+					appendStringInfoChar(&escapedpath, '\\');
+				appendStringInfoChar(&escapedpath, *s);
+			}
+
+			/*
+			 * Relpath holds the relative path of the tablespace directory
+			 * when it's located within PGDATA, or NULL if it's located
+			 * elsewhere.
+			 */
+			if (rllen > datadirpathlen &&
+				strncmp(linkpath, DataDir, datadirpathlen) == 0 &&
+				IS_DIR_SEP(linkpath[datadirpathlen]))
+				relpath = linkpath + datadirpathlen + 1;
+
+			ti = palloc(sizeof(tablespaceinfo));
+			ti->oid = pstrdup(de->d_name);
+			ti->path = pstrdup(linkpath);
+			ti->rpath = relpath ? pstrdup(relpath) : NULL;
+			ti->size = -1;
+
+			if (tablespaces)
+				*tablespaces = lappend(*tablespaces, ti);
+
+			appendStringInfo(tblspcmapfile, "%s %s\n",
+							 ti->oid, escapedpath.data);
+
+			pfree(escapedpath.data);
+		}
+		FreeDir(tblspcdir);
+
+		state->starttime = (pg_time_t) time(NULL);
+	}
+	PG_END_ENSURE_ERROR_CLEANUP(pg_backup_start_callback, (Datum) 0);
+
+	state->started_in_recovery = backup_started_in_recovery;
+
+	/*
+	 * Mark that the start phase has correctly finished for the backup.
+	 */
+	sessionBackupState = SESSION_BACKUP_RUNNING;
+}
+
+/* Error cleanup callback for pg_backup_start */
+static void
+pg_backup_start_callback(int code, Datum arg)
+{
+	int 	runningBackups;
+
+	/* Update backup counters and forcePageWrites on failure */
+	WALInsertLockAcquireExclusive();
+	runningBackups = GetrunningBackups(false);
+	Assert(runningBackups > 0);
+	SetrunningBackups(false, --runningBackups);
+	runningBackups = GetrunningBackups(false);
+
+	if (runningBackups == 0)
+		SetforcePageWrites(false, false);
+
+	WALInsertLockRelease();
+}
+
+/*
+ * Utility routine to fetch the session-level status of a backup running.
+ */
+SessionBackupState
+get_backup_status(void)
+{
+	return sessionBackupState;
+}
+
+/*
+ * do_pg_backup_stop
+ *
+ * Utility function called at the end of an online backup.  It creates history
+ * file (if required), resets sessionBackupState and so on.  It can optionally
+ * wait for WAL segments to be archived.
+ *
+ * "state" is filled with the information necessary to restore from this
+ * backup with its stop LSN (stoppoint), its timeline ID (stoptli), etc.
+ *
+ * It is the responsibility of the caller of this function to verify the
+ * permissions of the calling user!
+ */
+void
+do_pg_backup_stop(BackupState *state, bool waitforarchive)
+{
+	bool		backup_stopped_in_recovery = false;
+	char		histfilepath[MAXPGPATH];
+	char		lastxlogfilename[MAXFNAMELEN];
+	char		histfilename[MAXFNAMELEN];
+	XLogSegNo	_logSegNo;
+	FILE	   *fp;
+	int			seconds_before_warning;
+	int			waits = 0;
+	bool		reported_waiting = false;
+	int 		runningBackups;
+	ControlFileData	*ControlFile = GetControlFile();
+
+	Assert(state != NULL);
+
+	backup_stopped_in_recovery = RecoveryInProgress();
+
+	/*
+	 * During recovery, we don't need to check WAL level. Because, if WAL
+	 * level is not sufficient, it's impossible to get here during recovery.
+	 */
+	if (!backup_stopped_in_recovery && !XLogIsNeeded())
+		ereport(ERROR,
+				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+				 errmsg("WAL level not sufficient for making an online backup"),
+				 errhint("wal_level must be set to \"replica\" or \"logical\" at server start.")));
+
+	/*
+	 * OK to update backup counters, forcePageWrites, and session-level lock.
+	 *
+	 * Note that CHECK_FOR_INTERRUPTS() must not occur while updating them.
+	 * Otherwise they can be updated inconsistently, and which might cause
+	 * do_pg_abort_backup() to fail.
+	 */
+	WALInsertLockAcquireExclusive();
+
+	/*
+	 * It is expected that each do_pg_backup_start() call is matched by
+	 * exactly one do_pg_backup_stop() call.
+	 */
+	runningBackups = GetrunningBackups(false);
+	Assert(runningBackups > 0);
+	SetrunningBackups(false, --runningBackups);
+	runningBackups = GetrunningBackups(false);
+
+	if (runningBackups == 0)
+		SetforcePageWrites(false, false);
+
+	/*
+	 * Clean up session-level lock.
+	 *
+	 * You might think that WALInsertLockRelease() can be called before
+	 * cleaning up session-level lock because session-level lock doesn't need
+	 * to be protected with WAL insertion lock. But since
+	 * CHECK_FOR_INTERRUPTS() can occur in it, session-level lock must be
+	 * cleaned up before it.
+	 */
+	sessionBackupState = SESSION_BACKUP_NONE;
+
+	WALInsertLockRelease();
+
+	/*
+	 * If we are taking an online backup from the standby, we confirm that the
+	 * standby has not been promoted during the backup.
+	 */
+	if (state->started_in_recovery && !backup_stopped_in_recovery)
+		ereport(ERROR,
+				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+				 errmsg("the standby was promoted during online backup"),
+				 errhint("This means that the backup being taken is corrupt "
+						 "and should not be used. "
+						 "Try taking another online backup.")));
+
+	/*
+	 * During recovery, we don't write an end-of-backup record. We assume that
+	 * pg_control was backed up last and its minimum recovery point can be
+	 * available as the backup end location. Since we don't have an
+	 * end-of-backup record, we use the pg_control value to check whether
+	 * we've reached the end of backup when starting recovery from this
+	 * backup. We have no way of checking if pg_control wasn't backed up last
+	 * however.
+	 *
+	 * We don't force a switch to new WAL file but it is still possible to
+	 * wait for all the required files to be archived if waitforarchive is
+	 * true. This is okay if we use the backup to start a standby and fetch
+	 * the missing WAL using streaming replication. But in the case of an
+	 * archive recovery, a user should set waitforarchive to true and wait for
+	 * them to be archived to ensure that all the required files are
+	 * available.
+	 *
+	 * We return the current minimum recovery point as the backup end
+	 * location. Note that it can be greater than the exact backup end
+	 * location if the minimum recovery point is updated after the backup of
+	 * pg_control. This is harmless for current uses.
+	 *
+	 * XXX currently a backup history file is for informational and debug
+	 * purposes only. It's not essential for an online backup. Furthermore,
+	 * even if it's created, it will not be archived during recovery because
+	 * an archiver is not invoked. So it doesn't seem worthwhile to write a
+	 * backup history file during recovery.
+	 */
+	if (backup_stopped_in_recovery)
+	{
+		XLogRecPtr	recptr;
+
+		/*
+		 * Check to see if all WAL replayed during online backup contain
+		 * full-page writes.
+		 */
+		recptr = GetlastFpwDisableRecPtr();
+
+		if (state->startpoint <= recptr)
+			ereport(ERROR,
+					(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+					 errmsg("WAL generated with full_page_writes=off was replayed "
+							"during online backup"),
+					 errhint("This means that the backup being taken on the standby "
+							 "is corrupt and should not be used. "
+							 "Enable full_page_writes and run CHECKPOINT on the primary, "
+							 "and then try an online backup again.")));
+
+
+		LWLockAcquire(ControlFileLock, LW_SHARED);
+		state->stoppoint = ControlFile->minRecoveryPoint;
+		state->stoptli = ControlFile->minRecoveryPointTLI;
+		LWLockRelease(ControlFileLock);
+	}
+	else
+	{
+		char	   *history_file;
+
+		/*
+		 * Write the backup-end xlog record
+		 */
+		XLogBeginInsert();
+		XLogRegisterData((char *) (&state->startpoint),
+						 sizeof(state->startpoint));
+		state->stoppoint = XLogInsert(RM_XLOG_ID, XLOG_BACKUP_END);
+
+		/*
+		 * Given that we're not in recovery, InsertTimeLineID is set and can't
+		 * change, so we can read it without a lock.
+		 */
+		state->stoptli = GetWALInsertionTimeLine();
+
+		/*
+		 * Force a switch to a new xlog segment file, so that the backup is
+		 * valid as soon as archiver moves out the current segment file.
+		 */
+		RequestXLogSwitch(false);
+
+		XLByteToPrevSeg(state->stoppoint, _logSegNo, wal_segment_size);
+		state->stoptime = (pg_time_t) time(NULL);
+
+		/*
+		 * Write the backup history file
+		 */
+		XLByteToSeg(state->startpoint, _logSegNo, wal_segment_size);
+		BackupHistoryFilePath(histfilepath, state->stoptli, _logSegNo,
+							  state->startpoint, wal_segment_size);
+		fp = AllocateFile(histfilepath, "w");
+		if (!fp)
+			ereport(ERROR,
+					(errcode_for_file_access(),
+					 errmsg("could not create file \"%s\": %m",
+							histfilepath)));
+
+		/* Build and save the contents of the backup history file */
+		history_file = build_backup_content(state, true);
+		fprintf(fp, "%s", history_file);
+		pfree(history_file);
+
+		if (fflush(fp) || ferror(fp) || FreeFile(fp))
+			ereport(ERROR,
+					(errcode_for_file_access(),
+					 errmsg("could not write file \"%s\": %m",
+							histfilepath)));
+
+		/*
+		 * Clean out any no-longer-needed history files.  As a side effect,
+		 * this will post a .ready file for the newly created history file,
+		 * notifying the archiver that history file may be archived
+		 * immediately.
+		 */
+		CleanupBackupHistory();
+	}
+
+	/*
+	 * If archiving is enabled, wait for all the required WAL files to be
+	 * archived before returning. If archiving isn't enabled, the required WAL
+	 * needs to be transported via streaming replication (hopefully with
+	 * wal_keep_size set high enough), or some more exotic mechanism like
+	 * polling and copying files from pg_wal with script. We have no knowledge
+	 * of those mechanisms, so it's up to the user to ensure that he gets all
+	 * the required WAL.
+	 *
+	 * We wait until both the last WAL file filled during backup and the
+	 * history file have been archived, and assume that the alphabetic sorting
+	 * property of the WAL files ensures any earlier WAL files are safely
+	 * archived as well.
+	 *
+	 * We wait forever, since archive_command is supposed to work and we
+	 * assume the admin wanted his backup to work completely. If you don't
+	 * wish to wait, then either waitforarchive should be passed in as false,
+	 * or you can set statement_timeout.  Also, some notices are issued to
+	 * clue in anyone who might be doing this interactively.
+	 */
+
+	if (waitforarchive &&
+		((!backup_stopped_in_recovery && XLogArchivingActive()) ||
+		 (backup_stopped_in_recovery && XLogArchivingAlways())))
+	{
+		XLByteToPrevSeg(state->stoppoint, _logSegNo, wal_segment_size);
+		XLogFileName(lastxlogfilename, state->stoptli, _logSegNo,
+					 wal_segment_size);
+
+		XLByteToSeg(state->startpoint, _logSegNo, wal_segment_size);
+		BackupHistoryFileName(histfilename, state->stoptli, _logSegNo,
+							  state->startpoint, wal_segment_size);
+
+		seconds_before_warning = 60;
+		waits = 0;
+
+		while (XLogArchiveIsBusy(lastxlogfilename) ||
+			   XLogArchiveIsBusy(histfilename))
+		{
+			CHECK_FOR_INTERRUPTS();
+
+			if (!reported_waiting && waits > 5)
+			{
+				ereport(NOTICE,
+						(errmsg("base backup done, waiting for required WAL segments to be archived")));
+				reported_waiting = true;
+			}
+
+			(void) WaitLatch(MyLatch,
+							 WL_LATCH_SET | WL_TIMEOUT | WL_EXIT_ON_PM_DEATH,
+							 1000L,
+							 WAIT_EVENT_BACKUP_WAIT_WAL_ARCHIVE);
+			ResetLatch(MyLatch);
+
+			if (++waits >= seconds_before_warning)
+			{
+				seconds_before_warning *= 2;	/* This wraps in >10 years... */
+				ereport(WARNING,
+						(errmsg("still waiting for all required WAL segments to be archived (%d seconds elapsed)",
+								waits),
+						 errhint("Check that your archive_command is executing properly.  "
+								 "You can safely cancel this backup, "
+								 "but the database backup will not be usable without all the WAL segments.")));
+			}
+		}
+
+		ereport(NOTICE,
+				(errmsg("all required WAL segments have been archived")));
+	}
+	else if (waitforarchive)
+		ereport(NOTICE,
+				(errmsg("WAL archiving is not enabled; you must ensure that all required WAL segments are copied through other means to complete the backup")));
+}
+
+/*
+ * do_pg_abort_backup: abort a running backup
+ *
+ * This does just the most basic steps of do_pg_backup_stop(), by taking the
+ * system out of backup mode, thus making it a lot more safe to call from
+ * an error handler.
+ *
+ * The caller can pass 'arg' as 'true' or 'false' to control whether a warning
+ * is emitted.
+ *
+ * NB: This gets used as a before_shmem_exit handler, hence the odd-looking
+ * signature.
+ */
+void
+do_pg_abort_backup(int code, Datum arg)
+{
+	bool		emit_warning = DatumGetBool(arg);
+	int 		runningBackups;
+
+	/*
+	 * Quick exit if session does not have a running backup.
+	 */
+	if (sessionBackupState != SESSION_BACKUP_RUNNING)
+		return;
+
+	WALInsertLockAcquireExclusive();
+	runningBackups = GetrunningBackups(false);
+	Assert(runningBackups > 0);
+	SetrunningBackups(false, --runningBackups);
+	runningBackups = GetrunningBackups(false);
+
+	if (runningBackups == 0)
+		SetforcePageWrites(false, false);
+
+	sessionBackupState = SESSION_BACKUP_NONE;
+	WALInsertLockRelease();
+
+	if (emit_warning)
+		ereport(WARNING,
+				(errmsg("aborting backup due to backend exiting before pg_backup_stop was called")));
+}
+
+/*
+ * Register a handler that will warn about unterminated backups at end of
+ * session, unless this has already been done.
+ */
+void
+register_persistent_abort_backup_handler(void)
+{
+	static bool already_done = false;
+
+	if (already_done)
+		return;
+	before_shmem_exit(do_pg_abort_backup, DatumGetBool(true));
+	already_done = true;
+}
diff --git a/src/backend/access/transam/xlogrecovery.c b/src/backend/access/transam/xlogrecovery.c
index cb07694aea..cd36d95b56 100644
--- a/src/backend/access/transam/xlogrecovery.c
+++ b/src/backend/access/transam/xlogrecovery.c
@@ -36,6 +36,7 @@
 #include "access/xact.h"
 #include "access/xlog_internal.h"
 #include "access/xlogarchive.h"
+#include "access/xlogbackup.h"
 #include "access/xlogprefetcher.h"
 #include "access/xlogreader.h"
 #include "access/xlogrecovery.h"
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index 8109209eae..297de05b49 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -275,42 +275,9 @@ extern XLogRecPtr GetlastFpwDisableRecPtr(void);
 extern XLogRecPtr GetlastBackupStart(bool need_lock);
 extern void SetlastBackupStart(bool need_lock, XLogRecPtr recptr);
 
-/*
- * Routines to start, stop, and get status of a base backup.
- */
-
-/*
- * Session-level status of base backups
- *
- * This is used in parallel with the shared memory status to control parallel
- * execution of base backup functions for a given session, be it a backend
- * dedicated to replication or a normal backend connected to a database. The
- * update of the session-level status happens at the same time as the shared
- * memory counters to keep a consistent global and local state of the backups
- * running.
- */
-typedef enum SessionBackupState
-{
-	SESSION_BACKUP_NONE,
-	SESSION_BACKUP_RUNNING,
-} SessionBackupState;
-
-extern void do_pg_backup_start(const char *backupidstr, bool fast,
-							   List **tablespaces, BackupState *state,
-							   StringInfo tblspcmapfile);
-extern void do_pg_backup_stop(BackupState *state, bool waitforarchive);
-extern void do_pg_abort_backup(int code, Datum arg);
-extern void register_persistent_abort_backup_handler(void);
-extern SessionBackupState get_backup_status(void);
-
 /* File path names (all relative to $PGDATA) */
 #define RECOVERY_SIGNAL_FILE	"recovery.signal"
 #define STANDBY_SIGNAL_FILE		"standby.signal"
-#define BACKUP_LABEL_FILE		"backup_label"
-#define BACKUP_LABEL_OLD		"backup_label.old"
-
-#define TABLESPACE_MAP			"tablespace_map"
-#define TABLESPACE_MAP_OLD		"tablespace_map.old"
 
 /* files to signal promotion to primary */
 #define PROMOTE_SIGNAL_FILE		"promote"
diff --git a/src/include/access/xlogbackup.h b/src/include/access/xlogbackup.h
index 8ec3d88b0a..4f77cc2fd0 100644
--- a/src/include/access/xlogbackup.h
+++ b/src/include/access/xlogbackup.h
@@ -15,8 +15,15 @@
 #define XLOG_BACKUP_H
 
 #include "access/xlogdefs.h"
+#include "nodes/pg_list.h"
 #include "pgtime.h"
 
+/* File path names (all relative to $PGDATA) */
+#define BACKUP_LABEL_FILE		"backup_label"
+#define BACKUP_LABEL_OLD		"backup_label.old"
+#define TABLESPACE_MAP			"tablespace_map"
+#define TABLESPACE_MAP_OLD		"tablespace_map.old"
+
 /* Structure to hold backup state. */
 typedef struct BackupState
 {
@@ -35,7 +42,34 @@ typedef struct BackupState
 	pg_time_t	stoptime;		/* backup stop time */
 } BackupState;
 
+/*
+ * Session-level status of base backups
+ *
+ * This is used in parallel with the shared memory status to control parallel
+ * execution of base backup functions for a given session, be it a backend
+ * dedicated to replication or a normal backend connected to a database. The
+ * update of the session-level status happens at the same time as the shared
+ * memory counters to keep a consistent global and local state of the backups
+ * running.
+ */
+typedef enum SessionBackupState
+{
+	SESSION_BACKUP_NONE,
+	SESSION_BACKUP_RUNNING,
+} SessionBackupState;
+
 extern char *build_backup_content(BackupState *state,
 								  bool ishistoryfile);
 
+/*
+ * Routines to start, stop, and get status of a base backup.
+ */
+extern void do_pg_backup_start(const char *backupidstr, bool fast,
+							   List **tablespaces, BackupState *state,
+							   StringInfo tblspcmapfile);
+extern void do_pg_backup_stop(BackupState *state, bool waitforarchive);
+extern void do_pg_abort_backup(int code, Datum arg);
+extern void register_persistent_abort_backup_handler(void);
+extern SessionBackupState get_backup_status(void);
+
 #endif							/* XLOG_BACKUP_H */
-- 
2.34.1

Reply via email to