On Wed, Oct 5, 2022 at 1:20 PM Michael Paquier <mich...@paquier.xyz> wrote: > > On Tue, Oct 04, 2022 at 03:54:20PM -0700, Nathan Bossart wrote: > > I would suggest moving this to a separate prerequisite patch that can be > > reviewed independently from the patches that simply move code to a > > different file.
I added the new functions in 0001 patch for ease of review. > And FWIW, the SQL interfaces for pg_backup_start() and > pg_backup_stop() could stay in xlogfuncs.c. This has the advantage to > centralize in the same file all the SQL-function-specific checks. Agreed. +extern void WALInsertLockAcquire(void); +extern void WALInsertLockAcquireExclusive(void); +extern void WALInsertLockRelease(void); +extern void WALInsertLockUpdateInsertingAt(XLogRecPtr insertingAt); Note that I had moved all WAL insert lock related functions to xlog.h despite xlogbackup.c using 2 of them. This is done to keep all the functions together. Please review the attached v2 patch set. -- Bharath Rupireddy PostgreSQL Contributors Team RDS Open Source Databases Amazon Web Services: https://aws.amazon.com
From 7a0359813790c5c3872b5308ff562371e6467266 Mon Sep 17 00:00:00 2001 From: Bharath Rupireddy <bharath.rupireddyforpostgres@gmail.com> Date: Wed, 5 Oct 2022 08:59:45 +0000 Subject: [PATCH v2] Add functions for xlogbackup.c to call back into xlog.c --- src/backend/access/transam/xlog.c | 119 ++++++++++++++++++++++++++++-- src/include/access/xlog.h | 17 +++++ 2 files changed, 128 insertions(+), 8 deletions(-) diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 27085b15a8..604220b474 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -697,10 +697,6 @@ static XLogRecPtr XLogBytePosToRecPtr(uint64 bytepos); static XLogRecPtr XLogBytePosToEndRecPtr(uint64 bytepos); static uint64 XLogRecPtrToBytePos(XLogRecPtr ptr); -static void WALInsertLockAcquire(void); -static void WALInsertLockAcquireExclusive(void); -static void WALInsertLockRelease(void); -static void WALInsertLockUpdateInsertingAt(XLogRecPtr insertingAt); /* * Insert an XLOG record represented by an already-constructed chain of data @@ -1305,7 +1301,7 @@ CopyXLogRecordToWAL(int write_len, bool isLogSwitch, XLogRecData *rdata, /* * Acquire a WAL insertion lock, for inserting to WAL. */ -static void +void WALInsertLockAcquire(void) { bool immed; @@ -1350,7 +1346,7 @@ WALInsertLockAcquire(void) * Acquire all WAL insertion locks, to prevent other backends from inserting * to WAL. */ -static void +void WALInsertLockAcquireExclusive(void) { int i; @@ -1379,7 +1375,7 @@ WALInsertLockAcquireExclusive(void) * NB: Reset all variables to 0, so they cause LWLockWaitForVar to block the * next time the lock is acquired. */ -static void +void WALInsertLockRelease(void) { if (holdingAllLocks) @@ -1405,7 +1401,7 @@ WALInsertLockRelease(void) * Update our insertingAt value, to let others know that we've finished * inserting up to that point. */ -static void +void WALInsertLockUpdateInsertingAt(XLogRecPtr insertingAt) { if (holdingAllLocks) @@ -8983,3 +8979,110 @@ SetWalWriterSleeping(bool sleeping) XLogCtl->WalWriterSleeping = sleeping; SpinLockRelease(&XLogCtl->info_lck); } + +/* + * Get the ControlFile. + */ + ControlFileData * + GetControlFile(void) + { + return ControlFile; + } + +/* + * Set the forcePageWrites flag. + */ +void +SetforcePageWrites(bool need_lock, bool value) +{ + if (need_lock) + WALInsertLockAcquireExclusive(); + + XLogCtl->Insert.forcePageWrites = value; + + if (need_lock) + WALInsertLockRelease(); +} + +/* + * Set the runningBackups value. + */ +void +SetrunningBackups(bool need_lock, int value) +{ + if (need_lock) + WALInsertLockAcquireExclusive(); + + XLogCtl->Insert.runningBackups = value; + + if (need_lock) + WALInsertLockRelease(); +} + +/* + * Get the runningBackups value. + */ +int +GetrunningBackups(bool need_lock) +{ + int value; + + if (need_lock) + WALInsertLockAcquireExclusive(); + + value = XLogCtl->Insert.runningBackups; + + if (need_lock) + WALInsertLockRelease(); + + return value; +} + +/* + * Get the lastFpwDisableRecPtr. + */ +XLogRecPtr +GetlastFpwDisableRecPtr(void) +{ + XLogRecPtr recptr; + + SpinLockAcquire(&XLogCtl->info_lck); + recptr = XLogCtl->lastFpwDisableRecPtr; + SpinLockRelease(&XLogCtl->info_lck); + + return recptr; +} + +/* + * Get the lastBackupStar. + */ +XLogRecPtr +GetlastBackupStart(bool need_lock) +{ + XLogRecPtr recptr; + + if (need_lock) + WALInsertLockAcquireExclusive(); + + recptr = XLogCtl->Insert.lastBackupStart; + + if (need_lock) + WALInsertLockRelease(); + + return recptr; +} + +/* + * Set the lastBackupStar. + */ +void +SetlastBackupStart(bool need_lock, XLogRecPtr recptr) +{ + if (need_lock) + WALInsertLockAcquireExclusive(); + + XLogCtl->Insert.lastBackupStart = recptr; + + if (need_lock) + WALInsertLockRelease(); +} diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h index dce265098e..8109209eae 100644 --- a/src/include/access/xlog.h +++ b/src/include/access/xlog.h @@ -14,6 +14,7 @@ #include "access/xlogbackup.h" #include "access/xlogdefs.h" #include "access/xlogreader.h" +#include "catalog/pg_control.h" #include "datatype/timestamp.h" #include "lib/stringinfo.h" #include "nodes/pg_list.h" @@ -258,6 +259,22 @@ extern void SetInstallXLogFileSegmentActive(void); extern bool IsInstallXLogFileSegmentActive(void); extern void XLogShutdownWalRcv(void); +extern void WALInsertLockAcquire(void); +extern void WALInsertLockAcquireExclusive(void); +extern void WALInsertLockRelease(void); +extern void WALInsertLockUpdateInsertingAt(XLogRecPtr insertingAt); + +/* + * Routines used by xlogbackup.c to call back into xlog.c during backup. + */ +extern ControlFileData *GetControlFile(void); +extern void SetforcePageWrites(bool need_lock, bool value); +extern void SetrunningBackups(bool need_lock, int value); +extern int GetrunningBackups(bool need_lock); +extern XLogRecPtr GetlastFpwDisableRecPtr(void); +extern XLogRecPtr GetlastBackupStart(bool need_lock); +extern void SetlastBackupStart(bool need_lock, XLogRecPtr recptr); + /* * Routines to start, stop, and get status of a base backup. */ -- 2.34.1
From 67d17054c4a1f436f7a03d3dab9e9a9c33ff4200 Mon Sep 17 00:00:00 2001 From: Bharath Rupireddy <bharath.rupireddyforpostgres@gmail.com> Date: Wed, 5 Oct 2022 09:07:18 +0000 Subject: [PATCH v2] Move backup-related code from xlog.c to xlogbackup.c --- src/backend/access/transam/xlog.c | 706 +-------------------- src/backend/access/transam/xlogbackup.c | 719 ++++++++++++++++++++++ src/backend/access/transam/xlogrecovery.c | 1 + src/include/access/xlog.h | 33 - src/include/access/xlogbackup.h | 34 + 5 files changed, 756 insertions(+), 737 deletions(-) diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 604220b474..84175111b8 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -58,19 +58,19 @@ #include "access/xact.h" #include "access/xlog_internal.h" #include "access/xlogarchive.h" +#include "access/xlogbackup.h" #include "access/xloginsert.h" #include "access/xlogprefetcher.h" #include "access/xlogreader.h" #include "access/xlogrecovery.h" #include "access/xlogutils.h" -#include "backup/basebackup.h" #include "catalog/catversion.h" -#include "catalog/pg_control.h" #include "catalog/pg_database.h" #include "common/controldata_utils.h" #include "common/file_utils.h" #include "executor/instrument.h" #include "miscadmin.h" +#include "nodes/pg_list.h" #include "pg_trace.h" #include "pgstat.h" #include "port/atomics.h" @@ -86,7 +86,6 @@ #include "replication/walsender.h" #include "storage/bufmgr.h" #include "storage/fd.h" -#include "storage/ipc.h" #include "storage/large_object.h" #include "storage/latch.h" #include "storage/pmsignal.h" @@ -393,12 +392,6 @@ typedef union WALInsertLockPadded char pad[PG_CACHE_LINE_SIZE]; } WALInsertLockPadded; -/* - * Session status of running backup, used for sanity checks in SQL-callable - * functions to start and stop backups. - */ -static SessionBackupState sessionBackupState = SESSION_BACKUP_NONE; - /* * Shared state data for WAL insertion. */ @@ -670,7 +663,6 @@ static void RemoveXlogFile(const struct dirent *segment_de, TimeLineID insertTLI); static void UpdateLastRemovedPtr(char *filename); static void ValidateXLOGDirectoryStructure(void); -static void CleanupBackupHistory(void); static void UpdateMinRecoveryPoint(XLogRecPtr lsn, bool force); static bool PerformRecoveryXLogAction(void); static void InitControlFile(uint64 sysidentifier); @@ -679,8 +671,6 @@ static void ReadControlFile(void); static void UpdateControlFile(void); static char *str_time(pg_time_t tnow); -static void pg_backup_start_callback(int code, Datum arg); - static int get_sync_bit(int method); static void CopyXLogRecordToWAL(int write_len, bool isLogSwitch, @@ -3817,38 +3807,6 @@ ValidateXLOGDirectoryStructure(void) } } -/* - * Remove previous backup history files. This also retries creation of - * .ready files for any backup history files for which XLogArchiveNotify - * failed earlier. - */ -static void -CleanupBackupHistory(void) -{ - DIR *xldir; - struct dirent *xlde; - char path[MAXPGPATH + sizeof(XLOGDIR)]; - - xldir = AllocateDir(XLOGDIR); - - while ((xlde = ReadDir(xldir, XLOGDIR)) != NULL) - { - if (IsBackupHistoryFileName(xlde->d_name)) - { - if (XLogArchiveCheckDone(xlde->d_name)) - { - elog(DEBUG2, "removing WAL backup history file \"%s\"", - xlde->d_name); - snprintf(path, sizeof(path), XLOGDIR "/%s", xlde->d_name); - unlink(path); - XLogArchiveCleanup(xlde->d_name); - } - } - } - - FreeDir(xldir); -} - /* * I/O routines for pg_control * @@ -8235,666 +8193,6 @@ issue_xlog_fsync(int fd, XLogSegNo segno, TimeLineID tli) PendingWalStats.wal_sync++; } -/* - * do_pg_backup_start is the workhorse of the user-visible pg_backup_start() - * function. It creates the necessary starting checkpoint and constructs the - * backup state and tablespace map. - * - * Input parameters are "state" (the backup state), "fast" (if true, we do - * the checkpoint in immediate mode to make it faster), and "tablespaces" - * (if non-NULL, indicates a list of tablespaceinfo structs describing the - * cluster's tablespaces.). - * - * The tablespace map contents are appended to passed-in parameter - * tablespace_map and the caller is responsible for including it in the backup - * archive as 'tablespace_map'. The tablespace_map file is required mainly for - * tar format in windows as native windows utilities are not able to create - * symlinks while extracting files from tar. However for consistency and - * platform-independence, we do it the same way everywhere. - * - * It fills in "state" with the information required for the backup, such - * as the minimum WAL location that must be present to restore from this - * backup (starttli) and the corresponding timeline ID (starttli). - * - * Every successfully started backup must be stopped by calling - * do_pg_backup_stop() or do_pg_abort_backup(). There can be many - * backups active at the same time. - * - * It is the responsibility of the caller of this function to verify the - * permissions of the calling user! - */ -void -do_pg_backup_start(const char *backupidstr, bool fast, List **tablespaces, - BackupState *state, StringInfo tblspcmapfile) -{ - bool backup_started_in_recovery = false; - - Assert(state != NULL); - backup_started_in_recovery = RecoveryInProgress(); - - /* - * During recovery, we don't need to check WAL level. Because, if WAL - * level is not sufficient, it's impossible to get here during recovery. - */ - if (!backup_started_in_recovery && !XLogIsNeeded()) - ereport(ERROR, - (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), - errmsg("WAL level not sufficient for making an online backup"), - errhint("wal_level must be set to \"replica\" or \"logical\" at server start."))); - - if (strlen(backupidstr) > MAXPGPATH) - ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("backup label too long (max %d bytes)", - MAXPGPATH))); - - memcpy(state->name, backupidstr, strlen(backupidstr)); - - /* - * Mark backup active in shared memory. We must do full-page WAL writes - * during an on-line backup even if not doing so at other times, because - * it's quite possible for the backup dump to obtain a "torn" (partially - * written) copy of a database page if it reads the page concurrently with - * our write to the same page. This can be fixed as long as the first - * write to the page in the WAL sequence is a full-page write. Hence, we - * turn on forcePageWrites and then force a CHECKPOINT, to ensure there - * are no dirty pages in shared memory that might get dumped while the - * backup is in progress without having a corresponding WAL record. (Once - * the backup is complete, we need not force full-page writes anymore, - * since we expect that any pages not modified during the backup interval - * must have been correctly captured by the backup.) - * - * Note that forcePageWrites has no effect during an online backup from - * the standby. - * - * We must hold all the insertion locks to change the value of - * forcePageWrites, to ensure adequate interlocking against - * XLogInsertRecord(). - */ - WALInsertLockAcquireExclusive(); - XLogCtl->Insert.runningBackups++; - XLogCtl->Insert.forcePageWrites = true; - WALInsertLockRelease(); - - /* Ensure we release forcePageWrites if fail below */ - PG_ENSURE_ERROR_CLEANUP(pg_backup_start_callback, (Datum) 0); - { - bool gotUniqueStartpoint = false; - DIR *tblspcdir; - struct dirent *de; - tablespaceinfo *ti; - int datadirpathlen; - - /* - * Force an XLOG file switch before the checkpoint, to ensure that the - * WAL segment the checkpoint is written to doesn't contain pages with - * old timeline IDs. That would otherwise happen if you called - * pg_backup_start() right after restoring from a PITR archive: the - * first WAL segment containing the startup checkpoint has pages in - * the beginning with the old timeline ID. That can cause trouble at - * recovery: we won't have a history file covering the old timeline if - * pg_wal directory was not included in the base backup and the WAL - * archive was cleared too before starting the backup. - * - * This also ensures that we have emitted a WAL page header that has - * XLP_BKP_REMOVABLE off before we emit the checkpoint record. - * Therefore, if a WAL archiver (such as pglesslog) is trying to - * compress out removable backup blocks, it won't remove any that - * occur after this point. - * - * During recovery, we skip forcing XLOG file switch, which means that - * the backup taken during recovery is not available for the special - * recovery case described above. - */ - if (!backup_started_in_recovery) - RequestXLogSwitch(false); - - do - { - bool checkpointfpw; - - /* - * Force a CHECKPOINT. Aside from being necessary to prevent torn - * page problems, this guarantees that two successive backup runs - * will have different checkpoint positions and hence different - * history file names, even if nothing happened in between. - * - * During recovery, establish a restartpoint if possible. We use - * the last restartpoint as the backup starting checkpoint. This - * means that two successive backup runs can have same checkpoint - * positions. - * - * Since the fact that we are executing do_pg_backup_start() - * during recovery means that checkpointer is running, we can use - * RequestCheckpoint() to establish a restartpoint. - * - * We use CHECKPOINT_IMMEDIATE only if requested by user (via - * passing fast = true). Otherwise this can take awhile. - */ - RequestCheckpoint(CHECKPOINT_FORCE | CHECKPOINT_WAIT | - (fast ? CHECKPOINT_IMMEDIATE : 0)); - - /* - * Now we need to fetch the checkpoint record location, and also - * its REDO pointer. The oldest point in WAL that would be needed - * to restore starting from the checkpoint is precisely the REDO - * pointer. - */ - LWLockAcquire(ControlFileLock, LW_SHARED); - state->checkpointloc = ControlFile->checkPoint; - state->startpoint = ControlFile->checkPointCopy.redo; - state->starttli = ControlFile->checkPointCopy.ThisTimeLineID; - checkpointfpw = ControlFile->checkPointCopy.fullPageWrites; - LWLockRelease(ControlFileLock); - - if (backup_started_in_recovery) - { - XLogRecPtr recptr; - - /* - * Check to see if all WAL replayed during online backup - * (i.e., since last restartpoint used as backup starting - * checkpoint) contain full-page writes. - */ - SpinLockAcquire(&XLogCtl->info_lck); - recptr = XLogCtl->lastFpwDisableRecPtr; - SpinLockRelease(&XLogCtl->info_lck); - - if (!checkpointfpw || state->startpoint <= recptr) - ereport(ERROR, - (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), - errmsg("WAL generated with full_page_writes=off was replayed " - "since last restartpoint"), - errhint("This means that the backup being taken on the standby " - "is corrupt and should not be used. " - "Enable full_page_writes and run CHECKPOINT on the primary, " - "and then try an online backup again."))); - - /* - * During recovery, since we don't use the end-of-backup WAL - * record and don't write the backup history file, the - * starting WAL location doesn't need to be unique. This means - * that two base backups started at the same time might use - * the same checkpoint as starting locations. - */ - gotUniqueStartpoint = true; - } - - /* - * If two base backups are started at the same time (in WAL sender - * processes), we need to make sure that they use different - * checkpoints as starting locations, because we use the starting - * WAL location as a unique identifier for the base backup in the - * end-of-backup WAL record and when we write the backup history - * file. Perhaps it would be better generate a separate unique ID - * for each backup instead of forcing another checkpoint, but - * taking a checkpoint right after another is not that expensive - * either because only few buffers have been dirtied yet. - */ - WALInsertLockAcquireExclusive(); - if (XLogCtl->Insert.lastBackupStart < state->startpoint) - { - XLogCtl->Insert.lastBackupStart = state->startpoint; - gotUniqueStartpoint = true; - } - WALInsertLockRelease(); - } while (!gotUniqueStartpoint); - - /* - * Construct tablespace_map file. - */ - datadirpathlen = strlen(DataDir); - - /* Collect information about all tablespaces */ - tblspcdir = AllocateDir("pg_tblspc"); - while ((de = ReadDir(tblspcdir, "pg_tblspc")) != NULL) - { - char fullpath[MAXPGPATH + 10]; - char linkpath[MAXPGPATH]; - char *relpath = NULL; - int rllen; - StringInfoData escapedpath; - char *s; - - /* Skip anything that doesn't look like a tablespace */ - if (strspn(de->d_name, "0123456789") != strlen(de->d_name)) - continue; - - snprintf(fullpath, sizeof(fullpath), "pg_tblspc/%s", de->d_name); - - /* - * Skip anything that isn't a symlink/junction. For testing only, - * we sometimes use allow_in_place_tablespaces to create - * directories directly under pg_tblspc, which would fail below. - */ - if (get_dirent_type(fullpath, de, false, ERROR) != PGFILETYPE_LNK) - continue; - - rllen = readlink(fullpath, linkpath, sizeof(linkpath)); - if (rllen < 0) - { - ereport(WARNING, - (errmsg("could not read symbolic link \"%s\": %m", - fullpath))); - continue; - } - else if (rllen >= sizeof(linkpath)) - { - ereport(WARNING, - (errmsg("symbolic link \"%s\" target is too long", - fullpath))); - continue; - } - linkpath[rllen] = '\0'; - - /* - * Build a backslash-escaped version of the link path to include - * in the tablespace map file. - */ - initStringInfo(&escapedpath); - for (s = linkpath; *s; s++) - { - if (*s == '\n' || *s == '\r' || *s == '\\') - appendStringInfoChar(&escapedpath, '\\'); - appendStringInfoChar(&escapedpath, *s); - } - - /* - * Relpath holds the relative path of the tablespace directory - * when it's located within PGDATA, or NULL if it's located - * elsewhere. - */ - if (rllen > datadirpathlen && - strncmp(linkpath, DataDir, datadirpathlen) == 0 && - IS_DIR_SEP(linkpath[datadirpathlen])) - relpath = linkpath + datadirpathlen + 1; - - ti = palloc(sizeof(tablespaceinfo)); - ti->oid = pstrdup(de->d_name); - ti->path = pstrdup(linkpath); - ti->rpath = relpath ? pstrdup(relpath) : NULL; - ti->size = -1; - - if (tablespaces) - *tablespaces = lappend(*tablespaces, ti); - - appendStringInfo(tblspcmapfile, "%s %s\n", - ti->oid, escapedpath.data); - - pfree(escapedpath.data); - } - FreeDir(tblspcdir); - - state->starttime = (pg_time_t) time(NULL); - } - PG_END_ENSURE_ERROR_CLEANUP(pg_backup_start_callback, (Datum) 0); - - state->started_in_recovery = backup_started_in_recovery; - - /* - * Mark that the start phase has correctly finished for the backup. - */ - sessionBackupState = SESSION_BACKUP_RUNNING; -} - -/* Error cleanup callback for pg_backup_start */ -static void -pg_backup_start_callback(int code, Datum arg) -{ - /* Update backup counters and forcePageWrites on failure */ - WALInsertLockAcquireExclusive(); - - Assert(XLogCtl->Insert.runningBackups > 0); - XLogCtl->Insert.runningBackups--; - - if (XLogCtl->Insert.runningBackups == 0) - { - XLogCtl->Insert.forcePageWrites = false; - } - WALInsertLockRelease(); -} - -/* - * Utility routine to fetch the session-level status of a backup running. - */ -SessionBackupState -get_backup_status(void) -{ - return sessionBackupState; -} - -/* - * do_pg_backup_stop - * - * Utility function called at the end of an online backup. It creates history - * file (if required), resets sessionBackupState and so on. It can optionally - * wait for WAL segments to be archived. - * - * "state" is filled with the information necessary to restore from this - * backup with its stop LSN (stoppoint), its timeline ID (stoptli), etc. - * - * It is the responsibility of the caller of this function to verify the - * permissions of the calling user! - */ -void -do_pg_backup_stop(BackupState *state, bool waitforarchive) -{ - bool backup_stopped_in_recovery = false; - char histfilepath[MAXPGPATH]; - char lastxlogfilename[MAXFNAMELEN]; - char histfilename[MAXFNAMELEN]; - XLogSegNo _logSegNo; - FILE *fp; - int seconds_before_warning; - int waits = 0; - bool reported_waiting = false; - - Assert(state != NULL); - - backup_stopped_in_recovery = RecoveryInProgress(); - - /* - * During recovery, we don't need to check WAL level. Because, if WAL - * level is not sufficient, it's impossible to get here during recovery. - */ - if (!backup_stopped_in_recovery && !XLogIsNeeded()) - ereport(ERROR, - (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), - errmsg("WAL level not sufficient for making an online backup"), - errhint("wal_level must be set to \"replica\" or \"logical\" at server start."))); - - /* - * OK to update backup counters, forcePageWrites, and session-level lock. - * - * Note that CHECK_FOR_INTERRUPTS() must not occur while updating them. - * Otherwise they can be updated inconsistently, and which might cause - * do_pg_abort_backup() to fail. - */ - WALInsertLockAcquireExclusive(); - - /* - * It is expected that each do_pg_backup_start() call is matched by - * exactly one do_pg_backup_stop() call. - */ - Assert(XLogCtl->Insert.runningBackups > 0); - XLogCtl->Insert.runningBackups--; - - if (XLogCtl->Insert.runningBackups == 0) - { - XLogCtl->Insert.forcePageWrites = false; - } - - /* - * Clean up session-level lock. - * - * You might think that WALInsertLockRelease() can be called before - * cleaning up session-level lock because session-level lock doesn't need - * to be protected with WAL insertion lock. But since - * CHECK_FOR_INTERRUPTS() can occur in it, session-level lock must be - * cleaned up before it. - */ - sessionBackupState = SESSION_BACKUP_NONE; - - WALInsertLockRelease(); - - /* - * If we are taking an online backup from the standby, we confirm that the - * standby has not been promoted during the backup. - */ - if (state->started_in_recovery && !backup_stopped_in_recovery) - ereport(ERROR, - (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), - errmsg("the standby was promoted during online backup"), - errhint("This means that the backup being taken is corrupt " - "and should not be used. " - "Try taking another online backup."))); - - /* - * During recovery, we don't write an end-of-backup record. We assume that - * pg_control was backed up last and its minimum recovery point can be - * available as the backup end location. Since we don't have an - * end-of-backup record, we use the pg_control value to check whether - * we've reached the end of backup when starting recovery from this - * backup. We have no way of checking if pg_control wasn't backed up last - * however. - * - * We don't force a switch to new WAL file but it is still possible to - * wait for all the required files to be archived if waitforarchive is - * true. This is okay if we use the backup to start a standby and fetch - * the missing WAL using streaming replication. But in the case of an - * archive recovery, a user should set waitforarchive to true and wait for - * them to be archived to ensure that all the required files are - * available. - * - * We return the current minimum recovery point as the backup end - * location. Note that it can be greater than the exact backup end - * location if the minimum recovery point is updated after the backup of - * pg_control. This is harmless for current uses. - * - * XXX currently a backup history file is for informational and debug - * purposes only. It's not essential for an online backup. Furthermore, - * even if it's created, it will not be archived during recovery because - * an archiver is not invoked. So it doesn't seem worthwhile to write a - * backup history file during recovery. - */ - if (backup_stopped_in_recovery) - { - XLogRecPtr recptr; - - /* - * Check to see if all WAL replayed during online backup contain - * full-page writes. - */ - SpinLockAcquire(&XLogCtl->info_lck); - recptr = XLogCtl->lastFpwDisableRecPtr; - SpinLockRelease(&XLogCtl->info_lck); - - if (state->startpoint <= recptr) - ereport(ERROR, - (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), - errmsg("WAL generated with full_page_writes=off was replayed " - "during online backup"), - errhint("This means that the backup being taken on the standby " - "is corrupt and should not be used. " - "Enable full_page_writes and run CHECKPOINT on the primary, " - "and then try an online backup again."))); - - - LWLockAcquire(ControlFileLock, LW_SHARED); - state->stoppoint = ControlFile->minRecoveryPoint; - state->stoptli = ControlFile->minRecoveryPointTLI; - LWLockRelease(ControlFileLock); - } - else - { - char *history_file; - - /* - * Write the backup-end xlog record - */ - XLogBeginInsert(); - XLogRegisterData((char *) (&state->startpoint), - sizeof(state->startpoint)); - state->stoppoint = XLogInsert(RM_XLOG_ID, XLOG_BACKUP_END); - - /* - * Given that we're not in recovery, InsertTimeLineID is set and can't - * change, so we can read it without a lock. - */ - state->stoptli = XLogCtl->InsertTimeLineID; - - /* - * Force a switch to a new xlog segment file, so that the backup is - * valid as soon as archiver moves out the current segment file. - */ - RequestXLogSwitch(false); - - state->stoptime = (pg_time_t) time(NULL); - - /* - * Write the backup history file - */ - XLByteToSeg(state->startpoint, _logSegNo, wal_segment_size); - BackupHistoryFilePath(histfilepath, state->stoptli, _logSegNo, - state->startpoint, wal_segment_size); - fp = AllocateFile(histfilepath, "w"); - if (!fp) - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not create file \"%s\": %m", - histfilepath))); - - /* Build and save the contents of the backup history file */ - history_file = build_backup_content(state, true); - fprintf(fp, "%s", history_file); - pfree(history_file); - - if (fflush(fp) || ferror(fp) || FreeFile(fp)) - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not write file \"%s\": %m", - histfilepath))); - - /* - * Clean out any no-longer-needed history files. As a side effect, - * this will post a .ready file for the newly created history file, - * notifying the archiver that history file may be archived - * immediately. - */ - CleanupBackupHistory(); - } - - /* - * If archiving is enabled, wait for all the required WAL files to be - * archived before returning. If archiving isn't enabled, the required WAL - * needs to be transported via streaming replication (hopefully with - * wal_keep_size set high enough), or some more exotic mechanism like - * polling and copying files from pg_wal with script. We have no knowledge - * of those mechanisms, so it's up to the user to ensure that he gets all - * the required WAL. - * - * We wait until both the last WAL file filled during backup and the - * history file have been archived, and assume that the alphabetic sorting - * property of the WAL files ensures any earlier WAL files are safely - * archived as well. - * - * We wait forever, since archive_command is supposed to work and we - * assume the admin wanted his backup to work completely. If you don't - * wish to wait, then either waitforarchive should be passed in as false, - * or you can set statement_timeout. Also, some notices are issued to - * clue in anyone who might be doing this interactively. - */ - - if (waitforarchive && - ((!backup_stopped_in_recovery && XLogArchivingActive()) || - (backup_stopped_in_recovery && XLogArchivingAlways()))) - { - XLByteToPrevSeg(state->stoppoint, _logSegNo, wal_segment_size); - XLogFileName(lastxlogfilename, state->stoptli, _logSegNo, - wal_segment_size); - - XLByteToSeg(state->startpoint, _logSegNo, wal_segment_size); - BackupHistoryFileName(histfilename, state->stoptli, _logSegNo, - state->startpoint, wal_segment_size); - - seconds_before_warning = 60; - waits = 0; - - while (XLogArchiveIsBusy(lastxlogfilename) || - XLogArchiveIsBusy(histfilename)) - { - CHECK_FOR_INTERRUPTS(); - - if (!reported_waiting && waits > 5) - { - ereport(NOTICE, - (errmsg("base backup done, waiting for required WAL segments to be archived"))); - reported_waiting = true; - } - - (void) WaitLatch(MyLatch, - WL_LATCH_SET | WL_TIMEOUT | WL_EXIT_ON_PM_DEATH, - 1000L, - WAIT_EVENT_BACKUP_WAIT_WAL_ARCHIVE); - ResetLatch(MyLatch); - - if (++waits >= seconds_before_warning) - { - seconds_before_warning *= 2; /* This wraps in >10 years... */ - ereport(WARNING, - (errmsg("still waiting for all required WAL segments to be archived (%d seconds elapsed)", - waits), - errhint("Check that your archive_command is executing properly. " - "You can safely cancel this backup, " - "but the database backup will not be usable without all the WAL segments."))); - } - } - - ereport(NOTICE, - (errmsg("all required WAL segments have been archived"))); - } - else if (waitforarchive) - ereport(NOTICE, - (errmsg("WAL archiving is not enabled; you must ensure that all required WAL segments are copied through other means to complete the backup"))); -} - - -/* - * do_pg_abort_backup: abort a running backup - * - * This does just the most basic steps of do_pg_backup_stop(), by taking the - * system out of backup mode, thus making it a lot more safe to call from - * an error handler. - * - * The caller can pass 'arg' as 'true' or 'false' to control whether a warning - * is emitted. - * - * NB: This gets used as a before_shmem_exit handler, hence the odd-looking - * signature. - */ -void -do_pg_abort_backup(int code, Datum arg) -{ - bool emit_warning = DatumGetBool(arg); - - /* - * Quick exit if session does not have a running backup. - */ - if (sessionBackupState != SESSION_BACKUP_RUNNING) - return; - - WALInsertLockAcquireExclusive(); - Assert(XLogCtl->Insert.runningBackups > 0); - XLogCtl->Insert.runningBackups--; - - if (XLogCtl->Insert.runningBackups == 0) - { - XLogCtl->Insert.forcePageWrites = false; - } - - sessionBackupState = SESSION_BACKUP_NONE; - WALInsertLockRelease(); - - if (emit_warning) - ereport(WARNING, - (errmsg("aborting backup due to backend exiting before pg_backup_stop was called"))); -} - -/* - * Register a handler that will warn about unterminated backups at end of - * session, unless this has already been done. - */ -void -register_persistent_abort_backup_handler(void) -{ - static bool already_done = false; - - if (already_done) - return; - before_shmem_exit(do_pg_abort_backup, DatumGetBool(true)); - already_done = true; -} - /* * Get latest WAL insert pointer */ diff --git a/src/backend/access/transam/xlogbackup.c b/src/backend/access/transam/xlogbackup.c index 90b5273b02..62cea966a5 100644 --- a/src/backend/access/transam/xlogbackup.c +++ b/src/backend/access/transam/xlogbackup.c @@ -13,9 +13,32 @@ #include "postgres.h" +#include <time.h> +#include <unistd.h> + #include "access/xlog.h" #include "access/xlog_internal.h" +#include "access/xlogarchive.h" #include "access/xlogbackup.h" +#include "access/xloginsert.h" +#include "backup/basebackup.h" +#include "common/file_utils.h" +#include "miscadmin.h" +#include "postmaster/bgwriter.h" +#include "storage/fd.h" +#include "storage/ipc.h" +#include "storage/latch.h" +#include "storage/lwlock.h" +#include "utils/wait_event.h" + +/* + * Session status of running backup, used for sanity checks in SQL-callable + * functions to start and stop backups. + */ +static SessionBackupState sessionBackupState = SESSION_BACKUP_NONE; + +static void pg_backup_start_callback(int code, Datum arg); +static void CleanupBackupHistory(void); /* * Build contents for backup_label or backup history file. @@ -82,3 +105,699 @@ build_backup_content(BackupState *state, bool ishistoryfile) return data; } + +/* + * Remove previous backup history files. This also retries creation of + * .ready files for any backup history files for which XLogArchiveNotify + * failed earlier. + */ +static void +CleanupBackupHistory(void) +{ + DIR *xldir; + struct dirent *xlde; + char path[MAXPGPATH + sizeof(XLOGDIR)]; + + xldir = AllocateDir(XLOGDIR); + + while ((xlde = ReadDir(xldir, XLOGDIR)) != NULL) + { + if (IsBackupHistoryFileName(xlde->d_name)) + { + if (XLogArchiveCheckDone(xlde->d_name)) + { + elog(DEBUG2, "removing WAL backup history file \"%s\"", + xlde->d_name); + snprintf(path, sizeof(path), XLOGDIR "/%s", xlde->d_name); + unlink(path); + XLogArchiveCleanup(xlde->d_name); + } + } + } + + FreeDir(xldir); +} + +/* + * do_pg_backup_start is the workhorse of the user-visible pg_backup_start() + * function. It creates the necessary starting checkpoint and constructs the + * backup state and tablespace map. + * + * Input parameters are "state" (the backup state), "fast" (if true, we do + * the checkpoint in immediate mode to make it faster), and "tablespaces" + * (if non-NULL, indicates a list of tablespaceinfo structs describing the + * cluster's tablespaces.). + * + * The tablespace map contents are appended to passed-in parameter + * tablespace_map and the caller is responsible for including it in the backup + * archive as 'tablespace_map'. The tablespace_map file is required mainly for + * tar format in windows as native windows utilities are not able to create + * symlinks while extracting files from tar. However for consistency and + * platform-independence, we do it the same way everywhere. + * + * It fills in "state" with the information required for the backup, such + * as the minimum WAL location that must be present to restore from this + * backup (starttli) and the corresponding timeline ID (starttli). + * + * Every successfully started backup must be stopped by calling + * do_pg_backup_stop() or do_pg_abort_backup(). There can be many + * backups active at the same time. + * + * It is the responsibility of the caller of this function to verify the + * permissions of the calling user! + */ +void +do_pg_backup_start(const char *backupidstr, bool fast, List **tablespaces, + BackupState *state, StringInfo tblspcmapfile) +{ + bool backup_started_in_recovery = false; + int runningBackups; + ControlFileData *ControlFile = GetControlFile(); + + Assert(state != NULL); + backup_started_in_recovery = RecoveryInProgress(); + + /* + * During recovery, we don't need to check WAL level. Because, if WAL + * level is not sufficient, it's impossible to get here during recovery. + */ + if (!backup_started_in_recovery && !XLogIsNeeded()) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("WAL level not sufficient for making an online backup"), + errhint("wal_level must be set to \"replica\" or \"logical\" at server start."))); + + if (strlen(backupidstr) > MAXPGPATH) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("backup label too long (max %d bytes)", + MAXPGPATH))); + + memcpy(state->name, backupidstr, strlen(backupidstr)); + + /* + * Mark backup active in shared memory. We must do full-page WAL writes + * during an on-line backup even if not doing so at other times, because + * it's quite possible for the backup dump to obtain a "torn" (partially + * written) copy of a database page if it reads the page concurrently with + * our write to the same page. This can be fixed as long as the first + * write to the page in the WAL sequence is a full-page write. Hence, we + * turn on forcePageWrites and then force a CHECKPOINT, to ensure there + * are no dirty pages in shared memory that might get dumped while the + * backup is in progress without having a corresponding WAL record. (Once + * the backup is complete, we need not force full-page writes anymore, + * since we expect that any pages not modified during the backup interval + * must have been correctly captured by the backup.) + * + * Note that forcePageWrites has no effect during an online backup from + * the standby. + * + * We must hold all the insertion locks to change the value of + * forcePageWrites, to ensure adequate interlocking against + * XLogInsertRecord(). + */ + WALInsertLockAcquireExclusive(); + runningBackups = GetrunningBackups(false); + SetrunningBackups(false, ++runningBackups); + SetforcePageWrites(false, true); + WALInsertLockRelease(); + + /* Ensure we release forcePageWrites if fail below */ + PG_ENSURE_ERROR_CLEANUP(pg_backup_start_callback, (Datum) 0); + { + bool gotUniqueStartpoint = false; + DIR *tblspcdir; + struct dirent *de; + tablespaceinfo *ti; + int datadirpathlen; + + /* + * Force an XLOG file switch before the checkpoint, to ensure that the + * WAL segment the checkpoint is written to doesn't contain pages with + * old timeline IDs. That would otherwise happen if you called + * pg_backup_start() right after restoring from a PITR archive: the + * first WAL segment containing the startup checkpoint has pages in + * the beginning with the old timeline ID. That can cause trouble at + * recovery: we won't have a history file covering the old timeline if + * pg_wal directory was not included in the base backup and the WAL + * archive was cleared too before starting the backup. + * + * This also ensures that we have emitted a WAL page header that has + * XLP_BKP_REMOVABLE off before we emit the checkpoint record. + * Therefore, if a WAL archiver (such as pglesslog) is trying to + * compress out removable backup blocks, it won't remove any that + * occur after this point. + * + * During recovery, we skip forcing XLOG file switch, which means that + * the backup taken during recovery is not available for the special + * recovery case described above. + */ + if (!backup_started_in_recovery) + RequestXLogSwitch(false); + + do + { + bool checkpointfpw; + + /* + * Force a CHECKPOINT. Aside from being necessary to prevent torn + * page problems, this guarantees that two successive backup runs + * will have different checkpoint positions and hence different + * history file names, even if nothing happened in between. + * + * During recovery, establish a restartpoint if possible. We use + * the last restartpoint as the backup starting checkpoint. This + * means that two successive backup runs can have same checkpoint + * positions. + * + * Since the fact that we are executing do_pg_backup_start() + * during recovery means that checkpointer is running, we can use + * RequestCheckpoint() to establish a restartpoint. + * + * We use CHECKPOINT_IMMEDIATE only if requested by user (via + * passing fast = true). Otherwise this can take awhile. + */ + RequestCheckpoint(CHECKPOINT_FORCE | CHECKPOINT_WAIT | + (fast ? CHECKPOINT_IMMEDIATE : 0)); + + /* + * Now we need to fetch the checkpoint record location, and also + * its REDO pointer. The oldest point in WAL that would be needed + * to restore starting from the checkpoint is precisely the REDO + * pointer. + */ + LWLockAcquire(ControlFileLock, LW_SHARED); + state->checkpointloc = ControlFile->checkPoint; + state->startpoint = ControlFile->checkPointCopy.redo; + state->starttli = ControlFile->checkPointCopy.ThisTimeLineID; + checkpointfpw = ControlFile->checkPointCopy.fullPageWrites; + LWLockRelease(ControlFileLock); + + if (backup_started_in_recovery) + { + XLogRecPtr recptr; + + /* + * Check to see if all WAL replayed during online backup + * (i.e., since last restartpoint used as backup starting + * checkpoint) contain full-page writes. + */ + recptr = GetlastFpwDisableRecPtr(); + + if (!checkpointfpw || state->startpoint <= recptr) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("WAL generated with full_page_writes=off was replayed " + "since last restartpoint"), + errhint("This means that the backup being taken on the standby " + "is corrupt and should not be used. " + "Enable full_page_writes and run CHECKPOINT on the primary, " + "and then try an online backup again."))); + + /* + * During recovery, since we don't use the end-of-backup WAL + * record and don't write the backup history file, the + * starting WAL location doesn't need to be unique. This means + * that two base backups started at the same time might use + * the same checkpoint as starting locations. + */ + gotUniqueStartpoint = true; + } + + /* + * If two base backups are started at the same time (in WAL sender + * processes), we need to make sure that they use different + * checkpoints as starting locations, because we use the starting + * WAL location as a unique identifier for the base backup in the + * end-of-backup WAL record and when we write the backup history + * file. Perhaps it would be better generate a separate unique ID + * for each backup instead of forcing another checkpoint, but + * taking a checkpoint right after another is not that expensive + * either because only few buffers have been dirtied yet. + */ + WALInsertLockAcquireExclusive(); + if (GetlastBackupStart(false) < state->startpoint) + { + SetlastBackupStart(false, state->startpoint); + gotUniqueStartpoint = true; + } + WALInsertLockRelease(); + } while (!gotUniqueStartpoint); + + /* + * Construct tablespace_map file. + */ + datadirpathlen = strlen(DataDir); + + /* Collect information about all tablespaces */ + tblspcdir = AllocateDir("pg_tblspc"); + while ((de = ReadDir(tblspcdir, "pg_tblspc")) != NULL) + { + char fullpath[MAXPGPATH + 10]; + char linkpath[MAXPGPATH]; + char *relpath = NULL; + int rllen; + StringInfoData escapedpath; + char *s; + + /* Skip anything that doesn't look like a tablespace */ + if (strspn(de->d_name, "0123456789") != strlen(de->d_name)) + continue; + + snprintf(fullpath, sizeof(fullpath), "pg_tblspc/%s", de->d_name); + + /* + * Skip anything that isn't a symlink/junction. For testing only, + * we sometimes use allow_in_place_tablespaces to create + * directories directly under pg_tblspc, which would fail below. + */ + if (get_dirent_type(fullpath, de, false, ERROR) != PGFILETYPE_LNK) + continue; + + rllen = readlink(fullpath, linkpath, sizeof(linkpath)); + if (rllen < 0) + { + ereport(WARNING, + (errmsg("could not read symbolic link \"%s\": %m", + fullpath))); + continue; + } + else if (rllen >= sizeof(linkpath)) + { + ereport(WARNING, + (errmsg("symbolic link \"%s\" target is too long", + fullpath))); + continue; + } + linkpath[rllen] = '\0'; + + /* + * Build a backslash-escaped version of the link path to include + * in the tablespace map file. + */ + initStringInfo(&escapedpath); + for (s = linkpath; *s; s++) + { + if (*s == '\n' || *s == '\r' || *s == '\\') + appendStringInfoChar(&escapedpath, '\\'); + appendStringInfoChar(&escapedpath, *s); + } + + /* + * Relpath holds the relative path of the tablespace directory + * when it's located within PGDATA, or NULL if it's located + * elsewhere. + */ + if (rllen > datadirpathlen && + strncmp(linkpath, DataDir, datadirpathlen) == 0 && + IS_DIR_SEP(linkpath[datadirpathlen])) + relpath = linkpath + datadirpathlen + 1; + + ti = palloc(sizeof(tablespaceinfo)); + ti->oid = pstrdup(de->d_name); + ti->path = pstrdup(linkpath); + ti->rpath = relpath ? pstrdup(relpath) : NULL; + ti->size = -1; + + if (tablespaces) + *tablespaces = lappend(*tablespaces, ti); + + appendStringInfo(tblspcmapfile, "%s %s\n", + ti->oid, escapedpath.data); + + pfree(escapedpath.data); + } + FreeDir(tblspcdir); + + state->starttime = (pg_time_t) time(NULL); + } + PG_END_ENSURE_ERROR_CLEANUP(pg_backup_start_callback, (Datum) 0); + + state->started_in_recovery = backup_started_in_recovery; + + /* + * Mark that the start phase has correctly finished for the backup. + */ + sessionBackupState = SESSION_BACKUP_RUNNING; +} + +/* Error cleanup callback for pg_backup_start */ +static void +pg_backup_start_callback(int code, Datum arg) +{ + int runningBackups; + + /* Update backup counters and forcePageWrites on failure */ + WALInsertLockAcquireExclusive(); + runningBackups = GetrunningBackups(false); + Assert(runningBackups > 0); + SetrunningBackups(false, --runningBackups); + runningBackups = GetrunningBackups(false); + + if (runningBackups == 0) + SetforcePageWrites(false, false); + + WALInsertLockRelease(); +} + +/* + * Utility routine to fetch the session-level status of a backup running. + */ +SessionBackupState +get_backup_status(void) +{ + return sessionBackupState; +} + +/* + * do_pg_backup_stop + * + * Utility function called at the end of an online backup. It creates history + * file (if required), resets sessionBackupState and so on. It can optionally + * wait for WAL segments to be archived. + * + * "state" is filled with the information necessary to restore from this + * backup with its stop LSN (stoppoint), its timeline ID (stoptli), etc. + * + * It is the responsibility of the caller of this function to verify the + * permissions of the calling user! + */ +void +do_pg_backup_stop(BackupState *state, bool waitforarchive) +{ + bool backup_stopped_in_recovery = false; + char histfilepath[MAXPGPATH]; + char lastxlogfilename[MAXFNAMELEN]; + char histfilename[MAXFNAMELEN]; + XLogSegNo _logSegNo; + FILE *fp; + int seconds_before_warning; + int waits = 0; + bool reported_waiting = false; + int runningBackups; + ControlFileData *ControlFile = GetControlFile(); + + Assert(state != NULL); + + backup_stopped_in_recovery = RecoveryInProgress(); + + /* + * During recovery, we don't need to check WAL level. Because, if WAL + * level is not sufficient, it's impossible to get here during recovery. + */ + if (!backup_stopped_in_recovery && !XLogIsNeeded()) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("WAL level not sufficient for making an online backup"), + errhint("wal_level must be set to \"replica\" or \"logical\" at server start."))); + + /* + * OK to update backup counters, forcePageWrites, and session-level lock. + * + * Note that CHECK_FOR_INTERRUPTS() must not occur while updating them. + * Otherwise they can be updated inconsistently, and which might cause + * do_pg_abort_backup() to fail. + */ + WALInsertLockAcquireExclusive(); + + /* + * It is expected that each do_pg_backup_start() call is matched by + * exactly one do_pg_backup_stop() call. + */ + runningBackups = GetrunningBackups(false); + Assert(runningBackups > 0); + SetrunningBackups(false, --runningBackups); + runningBackups = GetrunningBackups(false); + + if (runningBackups == 0) + SetforcePageWrites(false, false); + + /* + * Clean up session-level lock. + * + * You might think that WALInsertLockRelease() can be called before + * cleaning up session-level lock because session-level lock doesn't need + * to be protected with WAL insertion lock. But since + * CHECK_FOR_INTERRUPTS() can occur in it, session-level lock must be + * cleaned up before it. + */ + sessionBackupState = SESSION_BACKUP_NONE; + + WALInsertLockRelease(); + + /* + * If we are taking an online backup from the standby, we confirm that the + * standby has not been promoted during the backup. + */ + if (state->started_in_recovery && !backup_stopped_in_recovery) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("the standby was promoted during online backup"), + errhint("This means that the backup being taken is corrupt " + "and should not be used. " + "Try taking another online backup."))); + + /* + * During recovery, we don't write an end-of-backup record. We assume that + * pg_control was backed up last and its minimum recovery point can be + * available as the backup end location. Since we don't have an + * end-of-backup record, we use the pg_control value to check whether + * we've reached the end of backup when starting recovery from this + * backup. We have no way of checking if pg_control wasn't backed up last + * however. + * + * We don't force a switch to new WAL file but it is still possible to + * wait for all the required files to be archived if waitforarchive is + * true. This is okay if we use the backup to start a standby and fetch + * the missing WAL using streaming replication. But in the case of an + * archive recovery, a user should set waitforarchive to true and wait for + * them to be archived to ensure that all the required files are + * available. + * + * We return the current minimum recovery point as the backup end + * location. Note that it can be greater than the exact backup end + * location if the minimum recovery point is updated after the backup of + * pg_control. This is harmless for current uses. + * + * XXX currently a backup history file is for informational and debug + * purposes only. It's not essential for an online backup. Furthermore, + * even if it's created, it will not be archived during recovery because + * an archiver is not invoked. So it doesn't seem worthwhile to write a + * backup history file during recovery. + */ + if (backup_stopped_in_recovery) + { + XLogRecPtr recptr; + + /* + * Check to see if all WAL replayed during online backup contain + * full-page writes. + */ + recptr = GetlastFpwDisableRecPtr(); + + if (state->startpoint <= recptr) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("WAL generated with full_page_writes=off was replayed " + "during online backup"), + errhint("This means that the backup being taken on the standby " + "is corrupt and should not be used. " + "Enable full_page_writes and run CHECKPOINT on the primary, " + "and then try an online backup again."))); + + + LWLockAcquire(ControlFileLock, LW_SHARED); + state->stoppoint = ControlFile->minRecoveryPoint; + state->stoptli = ControlFile->minRecoveryPointTLI; + LWLockRelease(ControlFileLock); + } + else + { + char *history_file; + + /* + * Write the backup-end xlog record + */ + XLogBeginInsert(); + XLogRegisterData((char *) (&state->startpoint), + sizeof(state->startpoint)); + state->stoppoint = XLogInsert(RM_XLOG_ID, XLOG_BACKUP_END); + + /* + * Given that we're not in recovery, InsertTimeLineID is set and can't + * change, so we can read it without a lock. + */ + state->stoptli = GetWALInsertionTimeLine(); + + /* + * Force a switch to a new xlog segment file, so that the backup is + * valid as soon as archiver moves out the current segment file. + */ + RequestXLogSwitch(false); + + XLByteToPrevSeg(state->stoppoint, _logSegNo, wal_segment_size); + state->stoptime = (pg_time_t) time(NULL); + + /* + * Write the backup history file + */ + XLByteToSeg(state->startpoint, _logSegNo, wal_segment_size); + BackupHistoryFilePath(histfilepath, state->stoptli, _logSegNo, + state->startpoint, wal_segment_size); + fp = AllocateFile(histfilepath, "w"); + if (!fp) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not create file \"%s\": %m", + histfilepath))); + + /* Build and save the contents of the backup history file */ + history_file = build_backup_content(state, true); + fprintf(fp, "%s", history_file); + pfree(history_file); + + if (fflush(fp) || ferror(fp) || FreeFile(fp)) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not write file \"%s\": %m", + histfilepath))); + + /* + * Clean out any no-longer-needed history files. As a side effect, + * this will post a .ready file for the newly created history file, + * notifying the archiver that history file may be archived + * immediately. + */ + CleanupBackupHistory(); + } + + /* + * If archiving is enabled, wait for all the required WAL files to be + * archived before returning. If archiving isn't enabled, the required WAL + * needs to be transported via streaming replication (hopefully with + * wal_keep_size set high enough), or some more exotic mechanism like + * polling and copying files from pg_wal with script. We have no knowledge + * of those mechanisms, so it's up to the user to ensure that he gets all + * the required WAL. + * + * We wait until both the last WAL file filled during backup and the + * history file have been archived, and assume that the alphabetic sorting + * property of the WAL files ensures any earlier WAL files are safely + * archived as well. + * + * We wait forever, since archive_command is supposed to work and we + * assume the admin wanted his backup to work completely. If you don't + * wish to wait, then either waitforarchive should be passed in as false, + * or you can set statement_timeout. Also, some notices are issued to + * clue in anyone who might be doing this interactively. + */ + + if (waitforarchive && + ((!backup_stopped_in_recovery && XLogArchivingActive()) || + (backup_stopped_in_recovery && XLogArchivingAlways()))) + { + XLByteToPrevSeg(state->stoppoint, _logSegNo, wal_segment_size); + XLogFileName(lastxlogfilename, state->stoptli, _logSegNo, + wal_segment_size); + + XLByteToSeg(state->startpoint, _logSegNo, wal_segment_size); + BackupHistoryFileName(histfilename, state->stoptli, _logSegNo, + state->startpoint, wal_segment_size); + + seconds_before_warning = 60; + waits = 0; + + while (XLogArchiveIsBusy(lastxlogfilename) || + XLogArchiveIsBusy(histfilename)) + { + CHECK_FOR_INTERRUPTS(); + + if (!reported_waiting && waits > 5) + { + ereport(NOTICE, + (errmsg("base backup done, waiting for required WAL segments to be archived"))); + reported_waiting = true; + } + + (void) WaitLatch(MyLatch, + WL_LATCH_SET | WL_TIMEOUT | WL_EXIT_ON_PM_DEATH, + 1000L, + WAIT_EVENT_BACKUP_WAIT_WAL_ARCHIVE); + ResetLatch(MyLatch); + + if (++waits >= seconds_before_warning) + { + seconds_before_warning *= 2; /* This wraps in >10 years... */ + ereport(WARNING, + (errmsg("still waiting for all required WAL segments to be archived (%d seconds elapsed)", + waits), + errhint("Check that your archive_command is executing properly. " + "You can safely cancel this backup, " + "but the database backup will not be usable without all the WAL segments."))); + } + } + + ereport(NOTICE, + (errmsg("all required WAL segments have been archived"))); + } + else if (waitforarchive) + ereport(NOTICE, + (errmsg("WAL archiving is not enabled; you must ensure that all required WAL segments are copied through other means to complete the backup"))); +} + +/* + * do_pg_abort_backup: abort a running backup + * + * This does just the most basic steps of do_pg_backup_stop(), by taking the + * system out of backup mode, thus making it a lot more safe to call from + * an error handler. + * + * The caller can pass 'arg' as 'true' or 'false' to control whether a warning + * is emitted. + * + * NB: This gets used as a before_shmem_exit handler, hence the odd-looking + * signature. + */ +void +do_pg_abort_backup(int code, Datum arg) +{ + bool emit_warning = DatumGetBool(arg); + int runningBackups; + + /* + * Quick exit if session does not have a running backup. + */ + if (sessionBackupState != SESSION_BACKUP_RUNNING) + return; + + WALInsertLockAcquireExclusive(); + runningBackups = GetrunningBackups(false); + Assert(runningBackups > 0); + SetrunningBackups(false, --runningBackups); + runningBackups = GetrunningBackups(false); + + if (runningBackups == 0) + SetforcePageWrites(false, false); + + sessionBackupState = SESSION_BACKUP_NONE; + WALInsertLockRelease(); + + if (emit_warning) + ereport(WARNING, + (errmsg("aborting backup due to backend exiting before pg_backup_stop was called"))); +} + +/* + * Register a handler that will warn about unterminated backups at end of + * session, unless this has already been done. + */ +void +register_persistent_abort_backup_handler(void) +{ + static bool already_done = false; + + if (already_done) + return; + before_shmem_exit(do_pg_abort_backup, DatumGetBool(true)); + already_done = true; +} diff --git a/src/backend/access/transam/xlogrecovery.c b/src/backend/access/transam/xlogrecovery.c index cb07694aea..cd36d95b56 100644 --- a/src/backend/access/transam/xlogrecovery.c +++ b/src/backend/access/transam/xlogrecovery.c @@ -36,6 +36,7 @@ #include "access/xact.h" #include "access/xlog_internal.h" #include "access/xlogarchive.h" +#include "access/xlogbackup.h" #include "access/xlogprefetcher.h" #include "access/xlogreader.h" #include "access/xlogrecovery.h" diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h index 8109209eae..297de05b49 100644 --- a/src/include/access/xlog.h +++ b/src/include/access/xlog.h @@ -275,42 +275,9 @@ extern XLogRecPtr GetlastFpwDisableRecPtr(void); extern XLogRecPtr GetlastBackupStart(bool need_lock); extern void SetlastBackupStart(bool need_lock, XLogRecPtr recptr); -/* - * Routines to start, stop, and get status of a base backup. - */ - -/* - * Session-level status of base backups - * - * This is used in parallel with the shared memory status to control parallel - * execution of base backup functions for a given session, be it a backend - * dedicated to replication or a normal backend connected to a database. The - * update of the session-level status happens at the same time as the shared - * memory counters to keep a consistent global and local state of the backups - * running. - */ -typedef enum SessionBackupState -{ - SESSION_BACKUP_NONE, - SESSION_BACKUP_RUNNING, -} SessionBackupState; - -extern void do_pg_backup_start(const char *backupidstr, bool fast, - List **tablespaces, BackupState *state, - StringInfo tblspcmapfile); -extern void do_pg_backup_stop(BackupState *state, bool waitforarchive); -extern void do_pg_abort_backup(int code, Datum arg); -extern void register_persistent_abort_backup_handler(void); -extern SessionBackupState get_backup_status(void); - /* File path names (all relative to $PGDATA) */ #define RECOVERY_SIGNAL_FILE "recovery.signal" #define STANDBY_SIGNAL_FILE "standby.signal" -#define BACKUP_LABEL_FILE "backup_label" -#define BACKUP_LABEL_OLD "backup_label.old" - -#define TABLESPACE_MAP "tablespace_map" -#define TABLESPACE_MAP_OLD "tablespace_map.old" /* files to signal promotion to primary */ #define PROMOTE_SIGNAL_FILE "promote" diff --git a/src/include/access/xlogbackup.h b/src/include/access/xlogbackup.h index 8ec3d88b0a..4f77cc2fd0 100644 --- a/src/include/access/xlogbackup.h +++ b/src/include/access/xlogbackup.h @@ -15,8 +15,15 @@ #define XLOG_BACKUP_H #include "access/xlogdefs.h" +#include "nodes/pg_list.h" #include "pgtime.h" +/* File path names (all relative to $PGDATA) */ +#define BACKUP_LABEL_FILE "backup_label" +#define BACKUP_LABEL_OLD "backup_label.old" +#define TABLESPACE_MAP "tablespace_map" +#define TABLESPACE_MAP_OLD "tablespace_map.old" + /* Structure to hold backup state. */ typedef struct BackupState { @@ -35,7 +42,34 @@ typedef struct BackupState pg_time_t stoptime; /* backup stop time */ } BackupState; +/* + * Session-level status of base backups + * + * This is used in parallel with the shared memory status to control parallel + * execution of base backup functions for a given session, be it a backend + * dedicated to replication or a normal backend connected to a database. The + * update of the session-level status happens at the same time as the shared + * memory counters to keep a consistent global and local state of the backups + * running. + */ +typedef enum SessionBackupState +{ + SESSION_BACKUP_NONE, + SESSION_BACKUP_RUNNING, +} SessionBackupState; + extern char *build_backup_content(BackupState *state, bool ishistoryfile); +/* + * Routines to start, stop, and get status of a base backup. + */ +extern void do_pg_backup_start(const char *backupidstr, bool fast, + List **tablespaces, BackupState *state, + StringInfo tblspcmapfile); +extern void do_pg_backup_stop(BackupState *state, bool waitforarchive); +extern void do_pg_abort_backup(int code, Datum arg); +extern void register_persistent_abort_backup_handler(void); +extern SessionBackupState get_backup_status(void); + #endif /* XLOG_BACKUP_H */ -- 2.34.1