Hi, xlog.c currently has ~9000 LOC, out of which ~700 LOC is backup related, making the file really unmanageable. The commit 7d708093b7400327658a30d1aa1d5e284d37622c added new files xlogbackup.c/.h for hosting all backup related code eventually. I propose to move all the backp related code from xlog.c and xlogfuncs.c to xlogbackup.c/.h. In doing so, I had to add a few Get/Set functions for XLogCtl variables so that xlogbackup.c can use them.
I'm attaching a patch set where 0001 and 0002 move backup code from xlogfuncs.c and xlog.c to xlogbackup.c/.h respectively. The advantage is that all the core's backup code is in one single file making it more readable and manageable while reducing the xlog.c's file size. Thoughts? Thanks Michael Paquier for suggesting to have new files for backup related code. [1] https://www.postgresql.org/message-id/CALj2ACX0wjo%2B49hbUmvc_zT1zwdqFOQyhorN0Ox-Rk6v97Nejw%40mail.gmail.com -- Bharath Rupireddy PostgreSQL Contributors Team RDS Open Source Databases Amazon Web Services: https://aws.amazon.com
From 3ccaf1ad08e00229106be0cca3180585e4eb9c6f Mon Sep 17 00:00:00 2001 From: Bharath Rupireddy <bharath.rupireddyforpostgres@gmail.com> Date: Wed, 28 Sep 2022 12:12:59 +0000 Subject: [PATCH v1] Move backup-related code from xlogfuncs.c to xlogbackup.c --- src/backend/access/transam/xlogbackup.c | 132 ++++++++++++++++++++++++ src/backend/access/transam/xlogfuncs.c | 131 ----------------------- 2 files changed, 132 insertions(+), 131 deletions(-) diff --git a/src/backend/access/transam/xlogbackup.c b/src/backend/access/transam/xlogbackup.c index 90b5273b02..073678d84f 100644 --- a/src/backend/access/transam/xlogbackup.c +++ b/src/backend/access/transam/xlogbackup.c @@ -16,6 +16,16 @@ #include "access/xlog.h" #include "access/xlog_internal.h" #include "access/xlogbackup.h" +#include "funcapi.h" +#include "utils/builtins.h" +#include "utils/memutils.h" +#include "utils/pg_lsn.h" + +/* + * Backup-related variables. + */ +static BackupState *backup_state = NULL; +static StringInfo tablespace_map = NULL; /* * Build contents for backup_label or backup history file. @@ -82,3 +92,125 @@ build_backup_content(BackupState *state, bool ishistoryfile) return data; } + +/* + * pg_backup_start: set up for taking an on-line backup dump + * + * Essentially what this does is to create the contents required for the + * backup_label file and the tablespace map. + * + * Permission checking for this function is managed through the normal + * GRANT system. + */ +Datum +pg_backup_start(PG_FUNCTION_ARGS) +{ + text *backupid = PG_GETARG_TEXT_PP(0); + bool fast = PG_GETARG_BOOL(1); + char *backupidstr; + SessionBackupState status = get_backup_status(); + MemoryContext oldcontext; + + backupidstr = text_to_cstring(backupid); + + if (status == SESSION_BACKUP_RUNNING) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("a backup is already in progress in this session"))); + + /* + * backup_state and tablespace_map need to be long-lived as they are used + * in pg_backup_stop(). + */ + oldcontext = MemoryContextSwitchTo(TopMemoryContext); + + /* Allocate backup state or reset it, if it comes from a previous run */ + if (backup_state == NULL) + backup_state = (BackupState *) palloc0(sizeof(BackupState)); + else + MemSet(backup_state, 0, sizeof(BackupState)); + + /* + * tablespace_map may have been created in a previous backup, so take this + * occasion to clean it. + */ + if (tablespace_map != NULL) + { + pfree(tablespace_map->data); + pfree(tablespace_map); + tablespace_map = NULL; + } + + tablespace_map = makeStringInfo(); + MemoryContextSwitchTo(oldcontext); + + register_persistent_abort_backup_handler(); + do_pg_backup_start(backupidstr, fast, NULL, backup_state, tablespace_map); + + PG_RETURN_LSN(backup_state->startpoint); +} + +/* + * pg_backup_stop: finish taking an on-line backup. + * + * The first parameter (variable 'waitforarchive'), which is optional, + * allows the user to choose if they want to wait for the WAL to be archived + * or if we should just return as soon as the WAL record is written. + * + * This function stops an in-progress backup, creates backup_label contents and + * it returns the backup stop LSN, backup_label and tablespace_map contents. + * + * The backup_label contains the user-supplied label string (typically this + * would be used to tell where the backup dump will be stored), the starting + * time, starting WAL location for the dump and so on. It is the caller's + * responsibility to write the backup_label and tablespace_map files in the + * data folder that will be restored from this backup. + * + * Permission checking for this function is managed through the normal + * GRANT system. + */ +Datum +pg_backup_stop(PG_FUNCTION_ARGS) +{ +#define PG_BACKUP_STOP_V2_COLS 3 + TupleDesc tupdesc; + Datum values[PG_BACKUP_STOP_V2_COLS] = {0}; + bool nulls[PG_BACKUP_STOP_V2_COLS] = {0}; + bool waitforarchive = PG_GETARG_BOOL(0); + char *backup_label; + SessionBackupState status = get_backup_status(); + + /* Initialize attributes information in the tuple descriptor */ + if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) + elog(ERROR, "return type must be a row type"); + + if (status != SESSION_BACKUP_RUNNING) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("backup is not in progress"), + errhint("Did you call pg_backup_start()?"))); + + Assert(backup_state != NULL); + Assert(tablespace_map != NULL); + + /* Stop the backup */ + do_pg_backup_stop(backup_state, waitforarchive); + + /* Build the contents of backup_label */ + backup_label = build_backup_content(backup_state, false); + + values[0] = LSNGetDatum(backup_state->stoppoint); + values[1] = CStringGetTextDatum(backup_label); + values[2] = CStringGetTextDatum(tablespace_map->data); + + /* Deallocate backup-related variables */ + pfree(backup_state); + backup_state = NULL; + pfree(tablespace_map->data); + pfree(tablespace_map); + tablespace_map = NULL; + pfree(backup_label); + + /* Returns the record as Datum */ + PG_RETURN_DATUM(HeapTupleGetDatum(heap_form_tuple(tupdesc, values, nulls))); +} diff --git a/src/backend/access/transam/xlogfuncs.c b/src/backend/access/transam/xlogfuncs.c index a801a94fe8..b415492722 100644 --- a/src/backend/access/transam/xlogfuncs.c +++ b/src/backend/access/transam/xlogfuncs.c @@ -18,9 +18,7 @@ #include <unistd.h> -#include "access/htup_details.h" #include "access/xlog_internal.h" -#include "access/xlogbackup.h" #include "access/xlogrecovery.h" #include "access/xlogutils.h" #include "catalog/pg_type.h" @@ -39,135 +37,6 @@ #include "utils/timestamp.h" #include "utils/tuplestore.h" -/* - * Backup-related variables. - */ -static BackupState *backup_state = NULL; -static StringInfo tablespace_map = NULL; - -/* - * pg_backup_start: set up for taking an on-line backup dump - * - * Essentially what this does is to create the contents required for the - * backup_label file and the tablespace map. - * - * Permission checking for this function is managed through the normal - * GRANT system. - */ -Datum -pg_backup_start(PG_FUNCTION_ARGS) -{ - text *backupid = PG_GETARG_TEXT_PP(0); - bool fast = PG_GETARG_BOOL(1); - char *backupidstr; - SessionBackupState status = get_backup_status(); - MemoryContext oldcontext; - - backupidstr = text_to_cstring(backupid); - - if (status == SESSION_BACKUP_RUNNING) - ereport(ERROR, - (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), - errmsg("a backup is already in progress in this session"))); - - /* - * backup_state and tablespace_map need to be long-lived as they are used - * in pg_backup_stop(). - */ - oldcontext = MemoryContextSwitchTo(TopMemoryContext); - - /* Allocate backup state or reset it, if it comes from a previous run */ - if (backup_state == NULL) - backup_state = (BackupState *) palloc0(sizeof(BackupState)); - else - MemSet(backup_state, 0, sizeof(BackupState)); - - /* - * tablespace_map may have been created in a previous backup, so take this - * occasion to clean it. - */ - if (tablespace_map != NULL) - { - pfree(tablespace_map->data); - pfree(tablespace_map); - tablespace_map = NULL; - } - - tablespace_map = makeStringInfo(); - MemoryContextSwitchTo(oldcontext); - - register_persistent_abort_backup_handler(); - do_pg_backup_start(backupidstr, fast, NULL, backup_state, tablespace_map); - - PG_RETURN_LSN(backup_state->startpoint); -} - - -/* - * pg_backup_stop: finish taking an on-line backup. - * - * The first parameter (variable 'waitforarchive'), which is optional, - * allows the user to choose if they want to wait for the WAL to be archived - * or if we should just return as soon as the WAL record is written. - * - * This function stops an in-progress backup, creates backup_label contents and - * it returns the backup stop LSN, backup_label and tablespace_map contents. - * - * The backup_label contains the user-supplied label string (typically this - * would be used to tell where the backup dump will be stored), the starting - * time, starting WAL location for the dump and so on. It is the caller's - * responsibility to write the backup_label and tablespace_map files in the - * data folder that will be restored from this backup. - * - * Permission checking for this function is managed through the normal - * GRANT system. - */ -Datum -pg_backup_stop(PG_FUNCTION_ARGS) -{ -#define PG_BACKUP_STOP_V2_COLS 3 - TupleDesc tupdesc; - Datum values[PG_BACKUP_STOP_V2_COLS] = {0}; - bool nulls[PG_BACKUP_STOP_V2_COLS] = {0}; - bool waitforarchive = PG_GETARG_BOOL(0); - char *backup_label; - SessionBackupState status = get_backup_status(); - - /* Initialize attributes information in the tuple descriptor */ - if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) - elog(ERROR, "return type must be a row type"); - - if (status != SESSION_BACKUP_RUNNING) - ereport(ERROR, - (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), - errmsg("backup is not in progress"), - errhint("Did you call pg_backup_start()?"))); - - Assert(backup_state != NULL); - Assert(tablespace_map != NULL); - - /* Stop the backup */ - do_pg_backup_stop(backup_state, waitforarchive); - - /* Build the contents of backup_label */ - backup_label = build_backup_content(backup_state, false); - - values[0] = LSNGetDatum(backup_state->stoppoint); - values[1] = CStringGetTextDatum(backup_label); - values[2] = CStringGetTextDatum(tablespace_map->data); - - /* Deallocate backup-related variables */ - pfree(backup_state); - backup_state = NULL; - pfree(tablespace_map->data); - pfree(tablespace_map); - tablespace_map = NULL; - pfree(backup_label); - - /* Returns the record as Datum */ - PG_RETURN_DATUM(HeapTupleGetDatum(heap_form_tuple(tupdesc, values, nulls))); -} - /* * pg_switch_wal: switch to next xlog file * -- 2.34.1
From 293619c49661c44b69b275833a2c40224c0831a8 Mon Sep 17 00:00:00 2001 From: Bharath Rupireddy <bharath.rupireddyforpostgres@gmail.com> Date: Wed, 28 Sep 2022 14:07:06 +0000 Subject: [PATCH v1] Move backup-related code from xlog.c to xlogbackup.c --- src/backend/access/transam/xlog.c | 827 +++------------------- src/backend/access/transam/xlogbackup.c | 719 +++++++++++++++++++ src/backend/access/transam/xlogrecovery.c | 1 + src/include/access/xlog.h | 44 +- src/include/access/xlogbackup.h | 34 + 5 files changed, 880 insertions(+), 745 deletions(-) diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 00992a11b9..49d11572b9 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -58,19 +58,19 @@ #include "access/xact.h" #include "access/xlog_internal.h" #include "access/xlogarchive.h" +#include "access/xlogbackup.h" #include "access/xloginsert.h" #include "access/xlogprefetcher.h" #include "access/xlogreader.h" #include "access/xlogrecovery.h" #include "access/xlogutils.h" -#include "backup/basebackup.h" #include "catalog/catversion.h" -#include "catalog/pg_control.h" #include "catalog/pg_database.h" #include "common/controldata_utils.h" #include "common/file_utils.h" #include "executor/instrument.h" #include "miscadmin.h" +#include "nodes/pg_list.h" #include "pg_trace.h" #include "pgstat.h" #include "port/atomics.h" @@ -86,7 +86,6 @@ #include "replication/walsender.h" #include "storage/bufmgr.h" #include "storage/fd.h" -#include "storage/ipc.h" #include "storage/large_object.h" #include "storage/latch.h" #include "storage/pmsignal.h" @@ -393,12 +392,6 @@ typedef union WALInsertLockPadded char pad[PG_CACHE_LINE_SIZE]; } WALInsertLockPadded; -/* - * Session status of running backup, used for sanity checks in SQL-callable - * functions to start and stop backups. - */ -static SessionBackupState sessionBackupState = SESSION_BACKUP_NONE; - /* * Shared state data for WAL insertion. */ @@ -670,7 +663,6 @@ static void RemoveXlogFile(const struct dirent *segment_de, TimeLineID insertTLI); static void UpdateLastRemovedPtr(char *filename); static void ValidateXLOGDirectoryStructure(void); -static void CleanupBackupHistory(void); static void UpdateMinRecoveryPoint(XLogRecPtr lsn, bool force); static bool PerformRecoveryXLogAction(void); static void InitControlFile(uint64 sysidentifier); @@ -679,8 +671,6 @@ static void ReadControlFile(void); static void UpdateControlFile(void); static char *str_time(pg_time_t tnow); -static void pg_backup_start_callback(int code, Datum arg); - static int get_sync_bit(int method); static void CopyXLogRecordToWAL(int write_len, bool isLogSwitch, @@ -697,11 +687,6 @@ static XLogRecPtr XLogBytePosToRecPtr(uint64 bytepos); static XLogRecPtr XLogBytePosToEndRecPtr(uint64 bytepos); static uint64 XLogRecPtrToBytePos(XLogRecPtr ptr); -static void WALInsertLockAcquire(void); -static void WALInsertLockAcquireExclusive(void); -static void WALInsertLockRelease(void); -static void WALInsertLockUpdateInsertingAt(XLogRecPtr insertingAt); - /* * Insert an XLOG record represented by an already-constructed chain of data * chunks. This is a low-level routine; to construct the WAL record header @@ -1305,7 +1290,7 @@ CopyXLogRecordToWAL(int write_len, bool isLogSwitch, XLogRecData *rdata, /* * Acquire a WAL insertion lock, for inserting to WAL. */ -static void +void WALInsertLockAcquire(void) { bool immed; @@ -1350,7 +1335,7 @@ WALInsertLockAcquire(void) * Acquire all WAL insertion locks, to prevent other backends from inserting * to WAL. */ -static void +void WALInsertLockAcquireExclusive(void) { int i; @@ -1379,7 +1364,7 @@ WALInsertLockAcquireExclusive(void) * NB: Reset all variables to 0, so they cause LWLockWaitForVar to block the * next time the lock is acquired. */ -static void +void WALInsertLockRelease(void) { if (holdingAllLocks) @@ -1405,7 +1390,7 @@ WALInsertLockRelease(void) * Update our insertingAt value, to let others know that we've finished * inserting up to that point. */ -static void +void WALInsertLockUpdateInsertingAt(XLogRecPtr insertingAt) { if (holdingAllLocks) @@ -3821,38 +3806,6 @@ ValidateXLOGDirectoryStructure(void) } } -/* - * Remove previous backup history files. This also retries creation of - * .ready files for any backup history files for which XLogArchiveNotify - * failed earlier. - */ -static void -CleanupBackupHistory(void) -{ - DIR *xldir; - struct dirent *xlde; - char path[MAXPGPATH + sizeof(XLOGDIR)]; - - xldir = AllocateDir(XLOGDIR); - - while ((xlde = ReadDir(xldir, XLOGDIR)) != NULL) - { - if (IsBackupHistoryFileName(xlde->d_name)) - { - if (XLogArchiveCheckDone(xlde->d_name)) - { - elog(DEBUG2, "removing WAL backup history file \"%s\"", - xlde->d_name); - snprintf(path, sizeof(path), XLOGDIR "/%s", xlde->d_name); - unlink(path); - XLogArchiveCleanup(xlde->d_name); - } - } - } - - FreeDir(xldir); -} - /* * I/O routines for pg_control * @@ -8239,667 +8192,6 @@ issue_xlog_fsync(int fd, XLogSegNo segno, TimeLineID tli) PendingWalStats.wal_sync++; } -/* - * do_pg_backup_start is the workhorse of the user-visible pg_backup_start() - * function. It creates the necessary starting checkpoint and constructs the - * backup state and tablespace map. - * - * Input parameters are "state" (the backup state), "fast" (if true, we do - * the checkpoint in immediate mode to make it faster), and "tablespaces" - * (if non-NULL, indicates a list of tablespaceinfo structs describing the - * cluster's tablespaces.). - * - * The tablespace map contents are appended to passed-in parameter - * tablespace_map and the caller is responsible for including it in the backup - * archive as 'tablespace_map'. The tablespace_map file is required mainly for - * tar format in windows as native windows utilities are not able to create - * symlinks while extracting files from tar. However for consistency and - * platform-independence, we do it the same way everywhere. - * - * It fills in "state" with the information required for the backup, such - * as the minimum WAL location that must be present to restore from this - * backup (starttli) and the corresponding timeline ID (starttli). - * - * Every successfully started backup must be stopped by calling - * do_pg_backup_stop() or do_pg_abort_backup(). There can be many - * backups active at the same time. - * - * It is the responsibility of the caller of this function to verify the - * permissions of the calling user! - */ -void -do_pg_backup_start(const char *backupidstr, bool fast, List **tablespaces, - BackupState *state, StringInfo tblspcmapfile) -{ - bool backup_started_in_recovery = false; - - Assert(state != NULL); - backup_started_in_recovery = RecoveryInProgress(); - - /* - * During recovery, we don't need to check WAL level. Because, if WAL - * level is not sufficient, it's impossible to get here during recovery. - */ - if (!backup_started_in_recovery && !XLogIsNeeded()) - ereport(ERROR, - (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), - errmsg("WAL level not sufficient for making an online backup"), - errhint("wal_level must be set to \"replica\" or \"logical\" at server start."))); - - if (strlen(backupidstr) > MAXPGPATH) - ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("backup label too long (max %d bytes)", - MAXPGPATH))); - - memcpy(state->name, backupidstr, strlen(backupidstr)); - - /* - * Mark backup active in shared memory. We must do full-page WAL writes - * during an on-line backup even if not doing so at other times, because - * it's quite possible for the backup dump to obtain a "torn" (partially - * written) copy of a database page if it reads the page concurrently with - * our write to the same page. This can be fixed as long as the first - * write to the page in the WAL sequence is a full-page write. Hence, we - * turn on forcePageWrites and then force a CHECKPOINT, to ensure there - * are no dirty pages in shared memory that might get dumped while the - * backup is in progress without having a corresponding WAL record. (Once - * the backup is complete, we need not force full-page writes anymore, - * since we expect that any pages not modified during the backup interval - * must have been correctly captured by the backup.) - * - * Note that forcePageWrites has no effect during an online backup from - * the standby. - * - * We must hold all the insertion locks to change the value of - * forcePageWrites, to ensure adequate interlocking against - * XLogInsertRecord(). - */ - WALInsertLockAcquireExclusive(); - XLogCtl->Insert.runningBackups++; - XLogCtl->Insert.forcePageWrites = true; - WALInsertLockRelease(); - - /* Ensure we release forcePageWrites if fail below */ - PG_ENSURE_ERROR_CLEANUP(pg_backup_start_callback, (Datum) 0); - { - bool gotUniqueStartpoint = false; - DIR *tblspcdir; - struct dirent *de; - tablespaceinfo *ti; - int datadirpathlen; - - /* - * Force an XLOG file switch before the checkpoint, to ensure that the - * WAL segment the checkpoint is written to doesn't contain pages with - * old timeline IDs. That would otherwise happen if you called - * pg_backup_start() right after restoring from a PITR archive: the - * first WAL segment containing the startup checkpoint has pages in - * the beginning with the old timeline ID. That can cause trouble at - * recovery: we won't have a history file covering the old timeline if - * pg_wal directory was not included in the base backup and the WAL - * archive was cleared too before starting the backup. - * - * This also ensures that we have emitted a WAL page header that has - * XLP_BKP_REMOVABLE off before we emit the checkpoint record. - * Therefore, if a WAL archiver (such as pglesslog) is trying to - * compress out removable backup blocks, it won't remove any that - * occur after this point. - * - * During recovery, we skip forcing XLOG file switch, which means that - * the backup taken during recovery is not available for the special - * recovery case described above. - */ - if (!backup_started_in_recovery) - RequestXLogSwitch(false); - - do - { - bool checkpointfpw; - - /* - * Force a CHECKPOINT. Aside from being necessary to prevent torn - * page problems, this guarantees that two successive backup runs - * will have different checkpoint positions and hence different - * history file names, even if nothing happened in between. - * - * During recovery, establish a restartpoint if possible. We use - * the last restartpoint as the backup starting checkpoint. This - * means that two successive backup runs can have same checkpoint - * positions. - * - * Since the fact that we are executing do_pg_backup_start() - * during recovery means that checkpointer is running, we can use - * RequestCheckpoint() to establish a restartpoint. - * - * We use CHECKPOINT_IMMEDIATE only if requested by user (via - * passing fast = true). Otherwise this can take awhile. - */ - RequestCheckpoint(CHECKPOINT_FORCE | CHECKPOINT_WAIT | - (fast ? CHECKPOINT_IMMEDIATE : 0)); - - /* - * Now we need to fetch the checkpoint record location, and also - * its REDO pointer. The oldest point in WAL that would be needed - * to restore starting from the checkpoint is precisely the REDO - * pointer. - */ - LWLockAcquire(ControlFileLock, LW_SHARED); - state->checkpointloc = ControlFile->checkPoint; - state->startpoint = ControlFile->checkPointCopy.redo; - state->starttli = ControlFile->checkPointCopy.ThisTimeLineID; - checkpointfpw = ControlFile->checkPointCopy.fullPageWrites; - LWLockRelease(ControlFileLock); - - if (backup_started_in_recovery) - { - XLogRecPtr recptr; - - /* - * Check to see if all WAL replayed during online backup - * (i.e., since last restartpoint used as backup starting - * checkpoint) contain full-page writes. - */ - SpinLockAcquire(&XLogCtl->info_lck); - recptr = XLogCtl->lastFpwDisableRecPtr; - SpinLockRelease(&XLogCtl->info_lck); - - if (!checkpointfpw || state->startpoint <= recptr) - ereport(ERROR, - (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), - errmsg("WAL generated with full_page_writes=off was replayed " - "since last restartpoint"), - errhint("This means that the backup being taken on the standby " - "is corrupt and should not be used. " - "Enable full_page_writes and run CHECKPOINT on the primary, " - "and then try an online backup again."))); - - /* - * During recovery, since we don't use the end-of-backup WAL - * record and don't write the backup history file, the - * starting WAL location doesn't need to be unique. This means - * that two base backups started at the same time might use - * the same checkpoint as starting locations. - */ - gotUniqueStartpoint = true; - } - - /* - * If two base backups are started at the same time (in WAL sender - * processes), we need to make sure that they use different - * checkpoints as starting locations, because we use the starting - * WAL location as a unique identifier for the base backup in the - * end-of-backup WAL record and when we write the backup history - * file. Perhaps it would be better generate a separate unique ID - * for each backup instead of forcing another checkpoint, but - * taking a checkpoint right after another is not that expensive - * either because only few buffers have been dirtied yet. - */ - WALInsertLockAcquireExclusive(); - if (XLogCtl->Insert.lastBackupStart < state->startpoint) - { - XLogCtl->Insert.lastBackupStart = state->startpoint; - gotUniqueStartpoint = true; - } - WALInsertLockRelease(); - } while (!gotUniqueStartpoint); - - /* - * Construct tablespace_map file. - */ - datadirpathlen = strlen(DataDir); - - /* Collect information about all tablespaces */ - tblspcdir = AllocateDir("pg_tblspc"); - while ((de = ReadDir(tblspcdir, "pg_tblspc")) != NULL) - { - char fullpath[MAXPGPATH + 10]; - char linkpath[MAXPGPATH]; - char *relpath = NULL; - int rllen; - StringInfoData escapedpath; - char *s; - - /* Skip anything that doesn't look like a tablespace */ - if (strspn(de->d_name, "0123456789") != strlen(de->d_name)) - continue; - - snprintf(fullpath, sizeof(fullpath), "pg_tblspc/%s", de->d_name); - - /* - * Skip anything that isn't a symlink/junction. For testing only, - * we sometimes use allow_in_place_tablespaces to create - * directories directly under pg_tblspc, which would fail below. - */ - if (get_dirent_type(fullpath, de, false, ERROR) != PGFILETYPE_LNK) - continue; - - rllen = readlink(fullpath, linkpath, sizeof(linkpath)); - if (rllen < 0) - { - ereport(WARNING, - (errmsg("could not read symbolic link \"%s\": %m", - fullpath))); - continue; - } - else if (rllen >= sizeof(linkpath)) - { - ereport(WARNING, - (errmsg("symbolic link \"%s\" target is too long", - fullpath))); - continue; - } - linkpath[rllen] = '\0'; - - /* - * Build a backslash-escaped version of the link path to include - * in the tablespace map file. - */ - initStringInfo(&escapedpath); - for (s = linkpath; *s; s++) - { - if (*s == '\n' || *s == '\r' || *s == '\\') - appendStringInfoChar(&escapedpath, '\\'); - appendStringInfoChar(&escapedpath, *s); - } - - /* - * Relpath holds the relative path of the tablespace directory - * when it's located within PGDATA, or NULL if it's located - * elsewhere. - */ - if (rllen > datadirpathlen && - strncmp(linkpath, DataDir, datadirpathlen) == 0 && - IS_DIR_SEP(linkpath[datadirpathlen])) - relpath = linkpath + datadirpathlen + 1; - - ti = palloc(sizeof(tablespaceinfo)); - ti->oid = pstrdup(de->d_name); - ti->path = pstrdup(linkpath); - ti->rpath = relpath ? pstrdup(relpath) : NULL; - ti->size = -1; - - if (tablespaces) - *tablespaces = lappend(*tablespaces, ti); - - appendStringInfo(tblspcmapfile, "%s %s\n", - ti->oid, escapedpath.data); - - pfree(escapedpath.data); - } - FreeDir(tblspcdir); - - state->starttime = (pg_time_t) time(NULL); - } - PG_END_ENSURE_ERROR_CLEANUP(pg_backup_start_callback, (Datum) 0); - - state->started_in_recovery = backup_started_in_recovery; - - /* - * Mark that the start phase has correctly finished for the backup. - */ - sessionBackupState = SESSION_BACKUP_RUNNING; -} - -/* Error cleanup callback for pg_backup_start */ -static void -pg_backup_start_callback(int code, Datum arg) -{ - /* Update backup counters and forcePageWrites on failure */ - WALInsertLockAcquireExclusive(); - - Assert(XLogCtl->Insert.runningBackups > 0); - XLogCtl->Insert.runningBackups--; - - if (XLogCtl->Insert.runningBackups == 0) - { - XLogCtl->Insert.forcePageWrites = false; - } - WALInsertLockRelease(); -} - -/* - * Utility routine to fetch the session-level status of a backup running. - */ -SessionBackupState -get_backup_status(void) -{ - return sessionBackupState; -} - -/* - * do_pg_backup_stop - * - * Utility function called at the end of an online backup. It creates history - * file (if required), resets sessionBackupState and so on. It can optionally - * wait for WAL segments to be archived. - * - * "state" is filled with the information necessary to restore from this - * backup with its stop LSN (stoppoint), its timeline ID (stoptli), etc. - * - * It is the responsibility of the caller of this function to verify the - * permissions of the calling user! - */ -void -do_pg_backup_stop(BackupState *state, bool waitforarchive) -{ - bool backup_stopped_in_recovery = false; - char histfilepath[MAXPGPATH]; - char lastxlogfilename[MAXFNAMELEN]; - char histfilename[MAXFNAMELEN]; - XLogSegNo _logSegNo; - FILE *fp; - int seconds_before_warning; - int waits = 0; - bool reported_waiting = false; - - Assert(state != NULL); - - backup_stopped_in_recovery = RecoveryInProgress(); - - /* - * During recovery, we don't need to check WAL level. Because, if WAL - * level is not sufficient, it's impossible to get here during recovery. - */ - if (!backup_stopped_in_recovery && !XLogIsNeeded()) - ereport(ERROR, - (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), - errmsg("WAL level not sufficient for making an online backup"), - errhint("wal_level must be set to \"replica\" or \"logical\" at server start."))); - - /* - * OK to update backup counters, forcePageWrites, and session-level lock. - * - * Note that CHECK_FOR_INTERRUPTS() must not occur while updating them. - * Otherwise they can be updated inconsistently, and which might cause - * do_pg_abort_backup() to fail. - */ - WALInsertLockAcquireExclusive(); - - /* - * It is expected that each do_pg_backup_start() call is matched by - * exactly one do_pg_backup_stop() call. - */ - Assert(XLogCtl->Insert.runningBackups > 0); - XLogCtl->Insert.runningBackups--; - - if (XLogCtl->Insert.runningBackups == 0) - { - XLogCtl->Insert.forcePageWrites = false; - } - - /* - * Clean up session-level lock. - * - * You might think that WALInsertLockRelease() can be called before - * cleaning up session-level lock because session-level lock doesn't need - * to be protected with WAL insertion lock. But since - * CHECK_FOR_INTERRUPTS() can occur in it, session-level lock must be - * cleaned up before it. - */ - sessionBackupState = SESSION_BACKUP_NONE; - - WALInsertLockRelease(); - - /* - * If we are taking an online backup from the standby, we confirm that the - * standby has not been promoted during the backup. - */ - if (state->started_in_recovery && !backup_stopped_in_recovery) - ereport(ERROR, - (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), - errmsg("the standby was promoted during online backup"), - errhint("This means that the backup being taken is corrupt " - "and should not be used. " - "Try taking another online backup."))); - - /* - * During recovery, we don't write an end-of-backup record. We assume that - * pg_control was backed up last and its minimum recovery point can be - * available as the backup end location. Since we don't have an - * end-of-backup record, we use the pg_control value to check whether - * we've reached the end of backup when starting recovery from this - * backup. We have no way of checking if pg_control wasn't backed up last - * however. - * - * We don't force a switch to new WAL file but it is still possible to - * wait for all the required files to be archived if waitforarchive is - * true. This is okay if we use the backup to start a standby and fetch - * the missing WAL using streaming replication. But in the case of an - * archive recovery, a user should set waitforarchive to true and wait for - * them to be archived to ensure that all the required files are - * available. - * - * We return the current minimum recovery point as the backup end - * location. Note that it can be greater than the exact backup end - * location if the minimum recovery point is updated after the backup of - * pg_control. This is harmless for current uses. - * - * XXX currently a backup history file is for informational and debug - * purposes only. It's not essential for an online backup. Furthermore, - * even if it's created, it will not be archived during recovery because - * an archiver is not invoked. So it doesn't seem worthwhile to write a - * backup history file during recovery. - */ - if (backup_stopped_in_recovery) - { - XLogRecPtr recptr; - - /* - * Check to see if all WAL replayed during online backup contain - * full-page writes. - */ - SpinLockAcquire(&XLogCtl->info_lck); - recptr = XLogCtl->lastFpwDisableRecPtr; - SpinLockRelease(&XLogCtl->info_lck); - - if (state->startpoint <= recptr) - ereport(ERROR, - (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), - errmsg("WAL generated with full_page_writes=off was replayed " - "during online backup"), - errhint("This means that the backup being taken on the standby " - "is corrupt and should not be used. " - "Enable full_page_writes and run CHECKPOINT on the primary, " - "and then try an online backup again."))); - - - LWLockAcquire(ControlFileLock, LW_SHARED); - state->stoppoint = ControlFile->minRecoveryPoint; - state->stoptli = ControlFile->minRecoveryPointTLI; - LWLockRelease(ControlFileLock); - } - else - { - char *history_file; - - /* - * Write the backup-end xlog record - */ - XLogBeginInsert(); - XLogRegisterData((char *) (&state->startpoint), - sizeof(state->startpoint)); - state->stoppoint = XLogInsert(RM_XLOG_ID, XLOG_BACKUP_END); - - /* - * Given that we're not in recovery, InsertTimeLineID is set and can't - * change, so we can read it without a lock. - */ - state->stoptli = XLogCtl->InsertTimeLineID; - - /* - * Force a switch to a new xlog segment file, so that the backup is - * valid as soon as archiver moves out the current segment file. - */ - RequestXLogSwitch(false); - - XLByteToPrevSeg(state->stoppoint, _logSegNo, wal_segment_size); - state->stoptime = (pg_time_t) time(NULL); - - /* - * Write the backup history file - */ - XLByteToSeg(state->startpoint, _logSegNo, wal_segment_size); - BackupHistoryFilePath(histfilepath, state->stoptli, _logSegNo, - state->startpoint, wal_segment_size); - fp = AllocateFile(histfilepath, "w"); - if (!fp) - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not create file \"%s\": %m", - histfilepath))); - - /* Build and save the contents of the backup history file */ - history_file = build_backup_content(state, true); - fprintf(fp, "%s", history_file); - pfree(history_file); - - if (fflush(fp) || ferror(fp) || FreeFile(fp)) - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not write file \"%s\": %m", - histfilepath))); - - /* - * Clean out any no-longer-needed history files. As a side effect, - * this will post a .ready file for the newly created history file, - * notifying the archiver that history file may be archived - * immediately. - */ - CleanupBackupHistory(); - } - - /* - * If archiving is enabled, wait for all the required WAL files to be - * archived before returning. If archiving isn't enabled, the required WAL - * needs to be transported via streaming replication (hopefully with - * wal_keep_size set high enough), or some more exotic mechanism like - * polling and copying files from pg_wal with script. We have no knowledge - * of those mechanisms, so it's up to the user to ensure that he gets all - * the required WAL. - * - * We wait until both the last WAL file filled during backup and the - * history file have been archived, and assume that the alphabetic sorting - * property of the WAL files ensures any earlier WAL files are safely - * archived as well. - * - * We wait forever, since archive_command is supposed to work and we - * assume the admin wanted his backup to work completely. If you don't - * wish to wait, then either waitforarchive should be passed in as false, - * or you can set statement_timeout. Also, some notices are issued to - * clue in anyone who might be doing this interactively. - */ - - if (waitforarchive && - ((!backup_stopped_in_recovery && XLogArchivingActive()) || - (backup_stopped_in_recovery && XLogArchivingAlways()))) - { - XLByteToPrevSeg(state->stoppoint, _logSegNo, wal_segment_size); - XLogFileName(lastxlogfilename, state->stoptli, _logSegNo, - wal_segment_size); - - XLByteToSeg(state->startpoint, _logSegNo, wal_segment_size); - BackupHistoryFileName(histfilename, state->stoptli, _logSegNo, - state->startpoint, wal_segment_size); - - seconds_before_warning = 60; - waits = 0; - - while (XLogArchiveIsBusy(lastxlogfilename) || - XLogArchiveIsBusy(histfilename)) - { - CHECK_FOR_INTERRUPTS(); - - if (!reported_waiting && waits > 5) - { - ereport(NOTICE, - (errmsg("base backup done, waiting for required WAL segments to be archived"))); - reported_waiting = true; - } - - (void) WaitLatch(MyLatch, - WL_LATCH_SET | WL_TIMEOUT | WL_EXIT_ON_PM_DEATH, - 1000L, - WAIT_EVENT_BACKUP_WAIT_WAL_ARCHIVE); - ResetLatch(MyLatch); - - if (++waits >= seconds_before_warning) - { - seconds_before_warning *= 2; /* This wraps in >10 years... */ - ereport(WARNING, - (errmsg("still waiting for all required WAL segments to be archived (%d seconds elapsed)", - waits), - errhint("Check that your archive_command is executing properly. " - "You can safely cancel this backup, " - "but the database backup will not be usable without all the WAL segments."))); - } - } - - ereport(NOTICE, - (errmsg("all required WAL segments have been archived"))); - } - else if (waitforarchive) - ereport(NOTICE, - (errmsg("WAL archiving is not enabled; you must ensure that all required WAL segments are copied through other means to complete the backup"))); -} - - -/* - * do_pg_abort_backup: abort a running backup - * - * This does just the most basic steps of do_pg_backup_stop(), by taking the - * system out of backup mode, thus making it a lot more safe to call from - * an error handler. - * - * The caller can pass 'arg' as 'true' or 'false' to control whether a warning - * is emitted. - * - * NB: This gets used as a before_shmem_exit handler, hence the odd-looking - * signature. - */ -void -do_pg_abort_backup(int code, Datum arg) -{ - bool emit_warning = DatumGetBool(arg); - - /* - * Quick exit if session does not have a running backup. - */ - if (sessionBackupState != SESSION_BACKUP_RUNNING) - return; - - WALInsertLockAcquireExclusive(); - Assert(XLogCtl->Insert.runningBackups > 0); - XLogCtl->Insert.runningBackups--; - - if (XLogCtl->Insert.runningBackups == 0) - { - XLogCtl->Insert.forcePageWrites = false; - } - - sessionBackupState = SESSION_BACKUP_NONE; - WALInsertLockRelease(); - - if (emit_warning) - ereport(WARNING, - (errmsg("aborting backup due to backend exiting before pg_backup_stop was called"))); -} - -/* - * Register a handler that will warn about unterminated backups at end of - * session, unless this has already been done. - */ -void -register_persistent_abort_backup_handler(void) -{ - static bool already_done = false; - - if (already_done) - return; - before_shmem_exit(do_pg_abort_backup, DatumGetBool(true)); - already_done = true; -} - /* * Get latest WAL insert pointer */ @@ -8974,6 +8266,15 @@ IsInstallXLogFileSegmentActive(void) return result; } +/* + * Get the ControlFile. + */ + ControlFileData * + GetControlFile(void) + { + return ControlFile; + } + /* * Update the WalWriterSleeping flag. */ @@ -8984,3 +8285,101 @@ SetWalWriterSleeping(bool sleeping) XLogCtl->WalWriterSleeping = sleeping; SpinLockRelease(&XLogCtl->info_lck); } + +/* + * Set the forcePageWrites flag. + */ +void +SetforcePageWrites(bool need_lock, bool value) +{ + if (need_lock) + WALInsertLockAcquireExclusive(); + + XLogCtl->Insert.forcePageWrites = value; + + if (need_lock) + WALInsertLockRelease(); +} + +/* + * Set the runningBackups value. + */ +void +SetrunningBackups(bool need_lock, int value) +{ + if (need_lock) + WALInsertLockAcquireExclusive(); + + XLogCtl->Insert.runningBackups = value; + + if (need_lock) + WALInsertLockRelease(); +} + +/* + * Get the runningBackups value. + */ +int +GetrunningBackups(bool need_lock) +{ + int value; + + if (need_lock) + WALInsertLockAcquireExclusive(); + + value = XLogCtl->Insert.runningBackups; + + if (need_lock) + WALInsertLockRelease(); + + return value; +} + +/* + * Get the lastFpwDisableRecPtr. + */ +XLogRecPtr +GetlastFpwDisableRecPtr(void) +{ + XLogRecPtr recptr; + + SpinLockAcquire(&XLogCtl->info_lck); + recptr = XLogCtl->lastFpwDisableRecPtr; + SpinLockRelease(&XLogCtl->info_lck); + + return recptr; +} + +/* + * Get the lastBackupStar. + */ +XLogRecPtr +GetlastBackupStart(bool need_lock) +{ + XLogRecPtr recptr; + + if (need_lock) + WALInsertLockAcquireExclusive(); + + recptr = XLogCtl->Insert.lastBackupStart; + + if (need_lock) + WALInsertLockRelease(); + + return recptr; +} + +/* + * Set the lastBackupStar. + */ +void +SetlastBackupStart(bool need_lock, XLogRecPtr recptr) +{ + if (need_lock) + WALInsertLockAcquireExclusive(); + + XLogCtl->Insert.lastBackupStart = recptr; + + if (need_lock) + WALInsertLockRelease(); +} diff --git a/src/backend/access/transam/xlogbackup.c b/src/backend/access/transam/xlogbackup.c index 073678d84f..ead776af0f 100644 --- a/src/backend/access/transam/xlogbackup.c +++ b/src/backend/access/transam/xlogbackup.c @@ -13,13 +13,27 @@ #include "postgres.h" +#include <time.h> +#include <unistd.h> + #include "access/xlog.h" #include "access/xlog_internal.h" +#include "access/xlogarchive.h" #include "access/xlogbackup.h" +#include "access/xloginsert.h" +#include "backup/basebackup.h" +#include "catalog/pg_control.h" +#include "common/file_utils.h" #include "funcapi.h" +#include "miscadmin.h" +#include "postmaster/bgwriter.h" +#include "storage/ipc.h" +#include "storage/latch.h" +#include "storage/lwlock.h" #include "utils/builtins.h" #include "utils/memutils.h" #include "utils/pg_lsn.h" +#include "utils/wait_event.h" /* * Backup-related variables. @@ -27,6 +41,15 @@ static BackupState *backup_state = NULL; static StringInfo tablespace_map = NULL; +/* + * Session status of running backup, used for sanity checks in SQL-callable + * functions to start and stop backups. + */ +static SessionBackupState sessionBackupState = SESSION_BACKUP_NONE; + +static void pg_backup_start_callback(int code, Datum arg); +static void CleanupBackupHistory(void); + /* * Build contents for backup_label or backup history file. * @@ -214,3 +237,699 @@ pg_backup_stop(PG_FUNCTION_ARGS) /* Returns the record as Datum */ PG_RETURN_DATUM(HeapTupleGetDatum(heap_form_tuple(tupdesc, values, nulls))); } + +/* + * Remove previous backup history files. This also retries creation of + * .ready files for any backup history files for which XLogArchiveNotify + * failed earlier. + */ +static void +CleanupBackupHistory(void) +{ + DIR *xldir; + struct dirent *xlde; + char path[MAXPGPATH + sizeof(XLOGDIR)]; + + xldir = AllocateDir(XLOGDIR); + + while ((xlde = ReadDir(xldir, XLOGDIR)) != NULL) + { + if (IsBackupHistoryFileName(xlde->d_name)) + { + if (XLogArchiveCheckDone(xlde->d_name)) + { + elog(DEBUG2, "removing WAL backup history file \"%s\"", + xlde->d_name); + snprintf(path, sizeof(path), XLOGDIR "/%s", xlde->d_name); + unlink(path); + XLogArchiveCleanup(xlde->d_name); + } + } + } + + FreeDir(xldir); +} + +/* + * do_pg_backup_start is the workhorse of the user-visible pg_backup_start() + * function. It creates the necessary starting checkpoint and constructs the + * backup state and tablespace map. + * + * Input parameters are "state" (the backup state), "fast" (if true, we do + * the checkpoint in immediate mode to make it faster), and "tablespaces" + * (if non-NULL, indicates a list of tablespaceinfo structs describing the + * cluster's tablespaces.). + * + * The tablespace map contents are appended to passed-in parameter + * tablespace_map and the caller is responsible for including it in the backup + * archive as 'tablespace_map'. The tablespace_map file is required mainly for + * tar format in windows as native windows utilities are not able to create + * symlinks while extracting files from tar. However for consistency and + * platform-independence, we do it the same way everywhere. + * + * It fills in "state" with the information required for the backup, such + * as the minimum WAL location that must be present to restore from this + * backup (starttli) and the corresponding timeline ID (starttli). + * + * Every successfully started backup must be stopped by calling + * do_pg_backup_stop() or do_pg_abort_backup(). There can be many + * backups active at the same time. + * + * It is the responsibility of the caller of this function to verify the + * permissions of the calling user! + */ +void +do_pg_backup_start(const char *backupidstr, bool fast, List **tablespaces, + BackupState *state, StringInfo tblspcmapfile) +{ + bool backup_started_in_recovery = false; + int runningBackups; + ControlFileData *ControlFile = GetControlFile(); + + Assert(state != NULL); + backup_started_in_recovery = RecoveryInProgress(); + + /* + * During recovery, we don't need to check WAL level. Because, if WAL + * level is not sufficient, it's impossible to get here during recovery. + */ + if (!backup_started_in_recovery && !XLogIsNeeded()) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("WAL level not sufficient for making an online backup"), + errhint("wal_level must be set to \"replica\" or \"logical\" at server start."))); + + if (strlen(backupidstr) > MAXPGPATH) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("backup label too long (max %d bytes)", + MAXPGPATH))); + + memcpy(state->name, backupidstr, strlen(backupidstr)); + + /* + * Mark backup active in shared memory. We must do full-page WAL writes + * during an on-line backup even if not doing so at other times, because + * it's quite possible for the backup dump to obtain a "torn" (partially + * written) copy of a database page if it reads the page concurrently with + * our write to the same page. This can be fixed as long as the first + * write to the page in the WAL sequence is a full-page write. Hence, we + * turn on forcePageWrites and then force a CHECKPOINT, to ensure there + * are no dirty pages in shared memory that might get dumped while the + * backup is in progress without having a corresponding WAL record. (Once + * the backup is complete, we need not force full-page writes anymore, + * since we expect that any pages not modified during the backup interval + * must have been correctly captured by the backup.) + * + * Note that forcePageWrites has no effect during an online backup from + * the standby. + * + * We must hold all the insertion locks to change the value of + * forcePageWrites, to ensure adequate interlocking against + * XLogInsertRecord(). + */ + WALInsertLockAcquireExclusive(); + runningBackups = GetrunningBackups(false); + SetrunningBackups(false, ++runningBackups); + SetforcePageWrites(false, true); + WALInsertLockRelease(); + + /* Ensure we release forcePageWrites if fail below */ + PG_ENSURE_ERROR_CLEANUP(pg_backup_start_callback, (Datum) 0); + { + bool gotUniqueStartpoint = false; + DIR *tblspcdir; + struct dirent *de; + tablespaceinfo *ti; + int datadirpathlen; + + /* + * Force an XLOG file switch before the checkpoint, to ensure that the + * WAL segment the checkpoint is written to doesn't contain pages with + * old timeline IDs. That would otherwise happen if you called + * pg_backup_start() right after restoring from a PITR archive: the + * first WAL segment containing the startup checkpoint has pages in + * the beginning with the old timeline ID. That can cause trouble at + * recovery: we won't have a history file covering the old timeline if + * pg_wal directory was not included in the base backup and the WAL + * archive was cleared too before starting the backup. + * + * This also ensures that we have emitted a WAL page header that has + * XLP_BKP_REMOVABLE off before we emit the checkpoint record. + * Therefore, if a WAL archiver (such as pglesslog) is trying to + * compress out removable backup blocks, it won't remove any that + * occur after this point. + * + * During recovery, we skip forcing XLOG file switch, which means that + * the backup taken during recovery is not available for the special + * recovery case described above. + */ + if (!backup_started_in_recovery) + RequestXLogSwitch(false); + + do + { + bool checkpointfpw; + + /* + * Force a CHECKPOINT. Aside from being necessary to prevent torn + * page problems, this guarantees that two successive backup runs + * will have different checkpoint positions and hence different + * history file names, even if nothing happened in between. + * + * During recovery, establish a restartpoint if possible. We use + * the last restartpoint as the backup starting checkpoint. This + * means that two successive backup runs can have same checkpoint + * positions. + * + * Since the fact that we are executing do_pg_backup_start() + * during recovery means that checkpointer is running, we can use + * RequestCheckpoint() to establish a restartpoint. + * + * We use CHECKPOINT_IMMEDIATE only if requested by user (via + * passing fast = true). Otherwise this can take awhile. + */ + RequestCheckpoint(CHECKPOINT_FORCE | CHECKPOINT_WAIT | + (fast ? CHECKPOINT_IMMEDIATE : 0)); + + /* + * Now we need to fetch the checkpoint record location, and also + * its REDO pointer. The oldest point in WAL that would be needed + * to restore starting from the checkpoint is precisely the REDO + * pointer. + */ + LWLockAcquire(ControlFileLock, LW_SHARED); + state->checkpointloc = ControlFile->checkPoint; + state->startpoint = ControlFile->checkPointCopy.redo; + state->starttli = ControlFile->checkPointCopy.ThisTimeLineID; + checkpointfpw = ControlFile->checkPointCopy.fullPageWrites; + LWLockRelease(ControlFileLock); + + if (backup_started_in_recovery) + { + XLogRecPtr recptr; + + /* + * Check to see if all WAL replayed during online backup + * (i.e., since last restartpoint used as backup starting + * checkpoint) contain full-page writes. + */ + recptr = GetlastFpwDisableRecPtr(); + + if (!checkpointfpw || state->startpoint <= recptr) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("WAL generated with full_page_writes=off was replayed " + "since last restartpoint"), + errhint("This means that the backup being taken on the standby " + "is corrupt and should not be used. " + "Enable full_page_writes and run CHECKPOINT on the primary, " + "and then try an online backup again."))); + + /* + * During recovery, since we don't use the end-of-backup WAL + * record and don't write the backup history file, the + * starting WAL location doesn't need to be unique. This means + * that two base backups started at the same time might use + * the same checkpoint as starting locations. + */ + gotUniqueStartpoint = true; + } + + /* + * If two base backups are started at the same time (in WAL sender + * processes), we need to make sure that they use different + * checkpoints as starting locations, because we use the starting + * WAL location as a unique identifier for the base backup in the + * end-of-backup WAL record and when we write the backup history + * file. Perhaps it would be better generate a separate unique ID + * for each backup instead of forcing another checkpoint, but + * taking a checkpoint right after another is not that expensive + * either because only few buffers have been dirtied yet. + */ + WALInsertLockAcquireExclusive(); + if (GetlastBackupStart(false) < state->startpoint) + { + SetlastBackupStart(false, state->startpoint); + gotUniqueStartpoint = true; + } + WALInsertLockRelease(); + } while (!gotUniqueStartpoint); + + /* + * Construct tablespace_map file. + */ + datadirpathlen = strlen(DataDir); + + /* Collect information about all tablespaces */ + tblspcdir = AllocateDir("pg_tblspc"); + while ((de = ReadDir(tblspcdir, "pg_tblspc")) != NULL) + { + char fullpath[MAXPGPATH + 10]; + char linkpath[MAXPGPATH]; + char *relpath = NULL; + int rllen; + StringInfoData escapedpath; + char *s; + + /* Skip anything that doesn't look like a tablespace */ + if (strspn(de->d_name, "0123456789") != strlen(de->d_name)) + continue; + + snprintf(fullpath, sizeof(fullpath), "pg_tblspc/%s", de->d_name); + + /* + * Skip anything that isn't a symlink/junction. For testing only, + * we sometimes use allow_in_place_tablespaces to create + * directories directly under pg_tblspc, which would fail below. + */ + if (get_dirent_type(fullpath, de, false, ERROR) != PGFILETYPE_LNK) + continue; + + rllen = readlink(fullpath, linkpath, sizeof(linkpath)); + if (rllen < 0) + { + ereport(WARNING, + (errmsg("could not read symbolic link \"%s\": %m", + fullpath))); + continue; + } + else if (rllen >= sizeof(linkpath)) + { + ereport(WARNING, + (errmsg("symbolic link \"%s\" target is too long", + fullpath))); + continue; + } + linkpath[rllen] = '\0'; + + /* + * Build a backslash-escaped version of the link path to include + * in the tablespace map file. + */ + initStringInfo(&escapedpath); + for (s = linkpath; *s; s++) + { + if (*s == '\n' || *s == '\r' || *s == '\\') + appendStringInfoChar(&escapedpath, '\\'); + appendStringInfoChar(&escapedpath, *s); + } + + /* + * Relpath holds the relative path of the tablespace directory + * when it's located within PGDATA, or NULL if it's located + * elsewhere. + */ + if (rllen > datadirpathlen && + strncmp(linkpath, DataDir, datadirpathlen) == 0 && + IS_DIR_SEP(linkpath[datadirpathlen])) + relpath = linkpath + datadirpathlen + 1; + + ti = palloc(sizeof(tablespaceinfo)); + ti->oid = pstrdup(de->d_name); + ti->path = pstrdup(linkpath); + ti->rpath = relpath ? pstrdup(relpath) : NULL; + ti->size = -1; + + if (tablespaces) + *tablespaces = lappend(*tablespaces, ti); + + appendStringInfo(tblspcmapfile, "%s %s\n", + ti->oid, escapedpath.data); + + pfree(escapedpath.data); + } + FreeDir(tblspcdir); + + state->starttime = (pg_time_t) time(NULL); + } + PG_END_ENSURE_ERROR_CLEANUP(pg_backup_start_callback, (Datum) 0); + + state->started_in_recovery = backup_started_in_recovery; + + /* + * Mark that the start phase has correctly finished for the backup. + */ + sessionBackupState = SESSION_BACKUP_RUNNING; +} + +/* Error cleanup callback for pg_backup_start */ +static void +pg_backup_start_callback(int code, Datum arg) +{ + int runningBackups; + + /* Update backup counters and forcePageWrites on failure */ + WALInsertLockAcquireExclusive(); + runningBackups = GetrunningBackups(false); + Assert(runningBackups > 0); + SetrunningBackups(false, --runningBackups); + runningBackups = GetrunningBackups(false); + + if (runningBackups == 0) + SetforcePageWrites(false, false); + + WALInsertLockRelease(); +} + +/* + * Utility routine to fetch the session-level status of a backup running. + */ +SessionBackupState +get_backup_status(void) +{ + return sessionBackupState; +} + +/* + * do_pg_backup_stop + * + * Utility function called at the end of an online backup. It creates history + * file (if required), resets sessionBackupState and so on. It can optionally + * wait for WAL segments to be archived. + * + * "state" is filled with the information necessary to restore from this + * backup with its stop LSN (stoppoint), its timeline ID (stoptli), etc. + * + * It is the responsibility of the caller of this function to verify the + * permissions of the calling user! + */ +void +do_pg_backup_stop(BackupState *state, bool waitforarchive) +{ + bool backup_stopped_in_recovery = false; + char histfilepath[MAXPGPATH]; + char lastxlogfilename[MAXFNAMELEN]; + char histfilename[MAXFNAMELEN]; + XLogSegNo _logSegNo; + FILE *fp; + int seconds_before_warning; + int waits = 0; + bool reported_waiting = false; + int runningBackups; + ControlFileData *ControlFile = GetControlFile(); + + Assert(state != NULL); + + backup_stopped_in_recovery = RecoveryInProgress(); + + /* + * During recovery, we don't need to check WAL level. Because, if WAL + * level is not sufficient, it's impossible to get here during recovery. + */ + if (!backup_stopped_in_recovery && !XLogIsNeeded()) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("WAL level not sufficient for making an online backup"), + errhint("wal_level must be set to \"replica\" or \"logical\" at server start."))); + + /* + * OK to update backup counters, forcePageWrites, and session-level lock. + * + * Note that CHECK_FOR_INTERRUPTS() must not occur while updating them. + * Otherwise they can be updated inconsistently, and which might cause + * do_pg_abort_backup() to fail. + */ + WALInsertLockAcquireExclusive(); + + /* + * It is expected that each do_pg_backup_start() call is matched by + * exactly one do_pg_backup_stop() call. + */ + runningBackups = GetrunningBackups(false); + Assert(runningBackups > 0); + SetrunningBackups(false, --runningBackups); + runningBackups = GetrunningBackups(false); + + if (runningBackups == 0) + SetforcePageWrites(false, false); + + /* + * Clean up session-level lock. + * + * You might think that WALInsertLockRelease() can be called before + * cleaning up session-level lock because session-level lock doesn't need + * to be protected with WAL insertion lock. But since + * CHECK_FOR_INTERRUPTS() can occur in it, session-level lock must be + * cleaned up before it. + */ + sessionBackupState = SESSION_BACKUP_NONE; + + WALInsertLockRelease(); + + /* + * If we are taking an online backup from the standby, we confirm that the + * standby has not been promoted during the backup. + */ + if (state->started_in_recovery && !backup_stopped_in_recovery) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("the standby was promoted during online backup"), + errhint("This means that the backup being taken is corrupt " + "and should not be used. " + "Try taking another online backup."))); + + /* + * During recovery, we don't write an end-of-backup record. We assume that + * pg_control was backed up last and its minimum recovery point can be + * available as the backup end location. Since we don't have an + * end-of-backup record, we use the pg_control value to check whether + * we've reached the end of backup when starting recovery from this + * backup. We have no way of checking if pg_control wasn't backed up last + * however. + * + * We don't force a switch to new WAL file but it is still possible to + * wait for all the required files to be archived if waitforarchive is + * true. This is okay if we use the backup to start a standby and fetch + * the missing WAL using streaming replication. But in the case of an + * archive recovery, a user should set waitforarchive to true and wait for + * them to be archived to ensure that all the required files are + * available. + * + * We return the current minimum recovery point as the backup end + * location. Note that it can be greater than the exact backup end + * location if the minimum recovery point is updated after the backup of + * pg_control. This is harmless for current uses. + * + * XXX currently a backup history file is for informational and debug + * purposes only. It's not essential for an online backup. Furthermore, + * even if it's created, it will not be archived during recovery because + * an archiver is not invoked. So it doesn't seem worthwhile to write a + * backup history file during recovery. + */ + if (backup_stopped_in_recovery) + { + XLogRecPtr recptr; + + /* + * Check to see if all WAL replayed during online backup contain + * full-page writes. + */ + recptr = GetlastFpwDisableRecPtr(); + + if (state->startpoint <= recptr) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("WAL generated with full_page_writes=off was replayed " + "during online backup"), + errhint("This means that the backup being taken on the standby " + "is corrupt and should not be used. " + "Enable full_page_writes and run CHECKPOINT on the primary, " + "and then try an online backup again."))); + + + LWLockAcquire(ControlFileLock, LW_SHARED); + state->stoppoint = ControlFile->minRecoveryPoint; + state->stoptli = ControlFile->minRecoveryPointTLI; + LWLockRelease(ControlFileLock); + } + else + { + char *history_file; + + /* + * Write the backup-end xlog record + */ + XLogBeginInsert(); + XLogRegisterData((char *) (&state->startpoint), + sizeof(state->startpoint)); + state->stoppoint = XLogInsert(RM_XLOG_ID, XLOG_BACKUP_END); + + /* + * Given that we're not in recovery, InsertTimeLineID is set and can't + * change, so we can read it without a lock. + */ + state->stoptli = GetWALInsertionTimeLine(); + + /* + * Force a switch to a new xlog segment file, so that the backup is + * valid as soon as archiver moves out the current segment file. + */ + RequestXLogSwitch(false); + + XLByteToPrevSeg(state->stoppoint, _logSegNo, wal_segment_size); + state->stoptime = (pg_time_t) time(NULL); + + /* + * Write the backup history file + */ + XLByteToSeg(state->startpoint, _logSegNo, wal_segment_size); + BackupHistoryFilePath(histfilepath, state->stoptli, _logSegNo, + state->startpoint, wal_segment_size); + fp = AllocateFile(histfilepath, "w"); + if (!fp) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not create file \"%s\": %m", + histfilepath))); + + /* Build and save the contents of the backup history file */ + history_file = build_backup_content(state, true); + fprintf(fp, "%s", history_file); + pfree(history_file); + + if (fflush(fp) || ferror(fp) || FreeFile(fp)) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not write file \"%s\": %m", + histfilepath))); + + /* + * Clean out any no-longer-needed history files. As a side effect, + * this will post a .ready file for the newly created history file, + * notifying the archiver that history file may be archived + * immediately. + */ + CleanupBackupHistory(); + } + + /* + * If archiving is enabled, wait for all the required WAL files to be + * archived before returning. If archiving isn't enabled, the required WAL + * needs to be transported via streaming replication (hopefully with + * wal_keep_size set high enough), or some more exotic mechanism like + * polling and copying files from pg_wal with script. We have no knowledge + * of those mechanisms, so it's up to the user to ensure that he gets all + * the required WAL. + * + * We wait until both the last WAL file filled during backup and the + * history file have been archived, and assume that the alphabetic sorting + * property of the WAL files ensures any earlier WAL files are safely + * archived as well. + * + * We wait forever, since archive_command is supposed to work and we + * assume the admin wanted his backup to work completely. If you don't + * wish to wait, then either waitforarchive should be passed in as false, + * or you can set statement_timeout. Also, some notices are issued to + * clue in anyone who might be doing this interactively. + */ + + if (waitforarchive && + ((!backup_stopped_in_recovery && XLogArchivingActive()) || + (backup_stopped_in_recovery && XLogArchivingAlways()))) + { + XLByteToPrevSeg(state->stoppoint, _logSegNo, wal_segment_size); + XLogFileName(lastxlogfilename, state->stoptli, _logSegNo, + wal_segment_size); + + XLByteToSeg(state->startpoint, _logSegNo, wal_segment_size); + BackupHistoryFileName(histfilename, state->stoptli, _logSegNo, + state->startpoint, wal_segment_size); + + seconds_before_warning = 60; + waits = 0; + + while (XLogArchiveIsBusy(lastxlogfilename) || + XLogArchiveIsBusy(histfilename)) + { + CHECK_FOR_INTERRUPTS(); + + if (!reported_waiting && waits > 5) + { + ereport(NOTICE, + (errmsg("base backup done, waiting for required WAL segments to be archived"))); + reported_waiting = true; + } + + (void) WaitLatch(MyLatch, + WL_LATCH_SET | WL_TIMEOUT | WL_EXIT_ON_PM_DEATH, + 1000L, + WAIT_EVENT_BACKUP_WAIT_WAL_ARCHIVE); + ResetLatch(MyLatch); + + if (++waits >= seconds_before_warning) + { + seconds_before_warning *= 2; /* This wraps in >10 years... */ + ereport(WARNING, + (errmsg("still waiting for all required WAL segments to be archived (%d seconds elapsed)", + waits), + errhint("Check that your archive_command is executing properly. " + "You can safely cancel this backup, " + "but the database backup will not be usable without all the WAL segments."))); + } + } + + ereport(NOTICE, + (errmsg("all required WAL segments have been archived"))); + } + else if (waitforarchive) + ereport(NOTICE, + (errmsg("WAL archiving is not enabled; you must ensure that all required WAL segments are copied through other means to complete the backup"))); +} + +/* + * do_pg_abort_backup: abort a running backup + * + * This does just the most basic steps of do_pg_backup_stop(), by taking the + * system out of backup mode, thus making it a lot more safe to call from + * an error handler. + * + * The caller can pass 'arg' as 'true' or 'false' to control whether a warning + * is emitted. + * + * NB: This gets used as a before_shmem_exit handler, hence the odd-looking + * signature. + */ +void +do_pg_abort_backup(int code, Datum arg) +{ + bool emit_warning = DatumGetBool(arg); + int runningBackups; + + /* + * Quick exit if session does not have a running backup. + */ + if (sessionBackupState != SESSION_BACKUP_RUNNING) + return; + + WALInsertLockAcquireExclusive(); + runningBackups = GetrunningBackups(false); + Assert(runningBackups > 0); + SetrunningBackups(false, --runningBackups); + runningBackups = GetrunningBackups(false); + + if (runningBackups == 0) + SetforcePageWrites(false, false); + + sessionBackupState = SESSION_BACKUP_NONE; + WALInsertLockRelease(); + + if (emit_warning) + ereport(WARNING, + (errmsg("aborting backup due to backend exiting before pg_backup_stop was called"))); +} + +/* + * Register a handler that will warn about unterminated backups at end of + * session, unless this has already been done. + */ +void +register_persistent_abort_backup_handler(void) +{ + static bool already_done = false; + + if (already_done) + return; + before_shmem_exit(do_pg_abort_backup, DatumGetBool(true)); + already_done = true; +} diff --git a/src/backend/access/transam/xlogrecovery.c b/src/backend/access/transam/xlogrecovery.c index b41e682664..cf0a534674 100644 --- a/src/backend/access/transam/xlogrecovery.c +++ b/src/backend/access/transam/xlogrecovery.c @@ -36,6 +36,7 @@ #include "access/xact.h" #include "access/xlog_internal.h" #include "access/xlogarchive.h" +#include "access/xlogbackup.h" #include "access/xlogprefetcher.h" #include "access/xlogreader.h" #include "access/xlogrecovery.h" diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h index dce265098e..2a89dc7146 100644 --- a/src/include/access/xlog.h +++ b/src/include/access/xlog.h @@ -11,12 +11,11 @@ #ifndef XLOG_H #define XLOG_H -#include "access/xlogbackup.h" #include "access/xlogdefs.h" #include "access/xlogreader.h" +#include "catalog/pg_control.h" #include "datatype/timestamp.h" #include "lib/stringinfo.h" -#include "nodes/pg_list.h" /* Sync methods */ @@ -258,42 +257,25 @@ extern void SetInstallXLogFileSegmentActive(void); extern bool IsInstallXLogFileSegmentActive(void); extern void XLogShutdownWalRcv(void); -/* - * Routines to start, stop, and get status of a base backup. - */ +extern void WALInsertLockAcquire(void); +extern void WALInsertLockAcquireExclusive(void); +extern void WALInsertLockRelease(void); +extern void WALInsertLockUpdateInsertingAt(XLogRecPtr insertingAt); /* - * Session-level status of base backups - * - * This is used in parallel with the shared memory status to control parallel - * execution of base backup functions for a given session, be it a backend - * dedicated to replication or a normal backend connected to a database. The - * update of the session-level status happens at the same time as the shared - * memory counters to keep a consistent global and local state of the backups - * running. + * Routines used by xlogbackup.c to call back into xlog.c during backup. */ -typedef enum SessionBackupState -{ - SESSION_BACKUP_NONE, - SESSION_BACKUP_RUNNING, -} SessionBackupState; - -extern void do_pg_backup_start(const char *backupidstr, bool fast, - List **tablespaces, BackupState *state, - StringInfo tblspcmapfile); -extern void do_pg_backup_stop(BackupState *state, bool waitforarchive); -extern void do_pg_abort_backup(int code, Datum arg); -extern void register_persistent_abort_backup_handler(void); -extern SessionBackupState get_backup_status(void); +extern void SetforcePageWrites(bool need_lock, bool value); +extern void SetrunningBackups(bool need_lock, int value); +extern int GetrunningBackups(bool need_lock); +extern XLogRecPtr GetlastFpwDisableRecPtr(void); +extern XLogRecPtr GetlastBackupStart(bool need_lock); +extern void SetlastBackupStart(bool need_lock, XLogRecPtr recptr); +extern ControlFileData *GetControlFile(void); /* File path names (all relative to $PGDATA) */ #define RECOVERY_SIGNAL_FILE "recovery.signal" #define STANDBY_SIGNAL_FILE "standby.signal" -#define BACKUP_LABEL_FILE "backup_label" -#define BACKUP_LABEL_OLD "backup_label.old" - -#define TABLESPACE_MAP "tablespace_map" -#define TABLESPACE_MAP_OLD "tablespace_map.old" /* files to signal promotion to primary */ #define PROMOTE_SIGNAL_FILE "promote" diff --git a/src/include/access/xlogbackup.h b/src/include/access/xlogbackup.h index 8ec3d88b0a..4f77cc2fd0 100644 --- a/src/include/access/xlogbackup.h +++ b/src/include/access/xlogbackup.h @@ -15,8 +15,15 @@ #define XLOG_BACKUP_H #include "access/xlogdefs.h" +#include "nodes/pg_list.h" #include "pgtime.h" +/* File path names (all relative to $PGDATA) */ +#define BACKUP_LABEL_FILE "backup_label" +#define BACKUP_LABEL_OLD "backup_label.old" +#define TABLESPACE_MAP "tablespace_map" +#define TABLESPACE_MAP_OLD "tablespace_map.old" + /* Structure to hold backup state. */ typedef struct BackupState { @@ -35,7 +42,34 @@ typedef struct BackupState pg_time_t stoptime; /* backup stop time */ } BackupState; +/* + * Session-level status of base backups + * + * This is used in parallel with the shared memory status to control parallel + * execution of base backup functions for a given session, be it a backend + * dedicated to replication or a normal backend connected to a database. The + * update of the session-level status happens at the same time as the shared + * memory counters to keep a consistent global and local state of the backups + * running. + */ +typedef enum SessionBackupState +{ + SESSION_BACKUP_NONE, + SESSION_BACKUP_RUNNING, +} SessionBackupState; + extern char *build_backup_content(BackupState *state, bool ishistoryfile); +/* + * Routines to start, stop, and get status of a base backup. + */ +extern void do_pg_backup_start(const char *backupidstr, bool fast, + List **tablespaces, BackupState *state, + StringInfo tblspcmapfile); +extern void do_pg_backup_stop(BackupState *state, bool waitforarchive); +extern void do_pg_abort_backup(int code, Datum arg); +extern void register_persistent_abort_backup_handler(void); +extern SessionBackupState get_backup_status(void); + #endif /* XLOG_BACKUP_H */ -- 2.34.1