On Fri, Jan 20, 2012 at 3:43 AM, Fujii Masao <masao.fu...@gmail.com> wrote:
Requested update -- Simon Riggs http://www.2ndQuadrant.com/ PostgreSQL Development, 24x7 Support, Training & Services
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index ce659ec..469e6d6 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -40,6 +40,7 @@ #include "pgstat.h" #include "postmaster/bgwriter.h" #include "postmaster/startup.h" +#include "postmaster/walrestore.h" #include "replication/walreceiver.h" #include "replication/walsender.h" #include "storage/bufmgr.h" @@ -187,7 +188,6 @@ static bool InArchiveRecovery = false; static bool restoredFromArchive = false; /* options taken from recovery.conf for archive recovery */ -static char *recoveryRestoreCommand = NULL; static char *recoveryEndCommand = NULL; static char *archiveCleanupCommand = NULL; static RecoveryTargetType recoveryTarget = RECOVERY_TARGET_UNSET; @@ -575,8 +575,8 @@ bool reachedConsistency = false; static bool InRedo = false; -/* Have we launched bgwriter during recovery? */ -static bool bgwriterLaunched = false; +/* Have we launched background procs during archive recovery yet? */ +static bool ArchRecoveryBgProcsActive = false; /* * Information logged when we detect a change in one of the parameters @@ -632,8 +632,6 @@ static bool XLogPageRead(XLogRecPtr *RecPtr, int emode, bool fetching_ckpt, bool randAccess); static int emode_for_corrupt_record(int emode, XLogRecPtr RecPtr); static void XLogFileClose(void); -static bool RestoreArchivedFile(char *path, const char *xlogfname, - const char *recovername, off_t expectedSize); static void ExecuteRecoveryCommand(char *command, char *commandName, bool failOnerror); static void PreallocXlogFiles(XLogRecPtr endptr); @@ -2706,19 +2704,47 @@ XLogFileRead(uint32 log, uint32 seg, int emode, TimeLineID tli, XLogFileName(xlogfname, tli, log, seg); +#define TMPRECOVERYXLOG "RECOVERYXLOG" + switch (source) { case XLOG_FROM_ARCHIVE: + /* + * Check to see if the WALRestore process has already put the + * next file in place while we were working. If so, use that. + * If not, get it ourselves. This makes it easier to handle + * initial state before the WALRestore is active, and also + * handles the stop/start logic correctly when we have both + * streaming and file based replication active. + * + * We queue up the next task for WALRestore after we've begun to + * use this file later in XLogFileRead(). + * + * If the WALRestore process is still active, the lock wait makes + * us wait, which is just like we were executing the command + * ourselves and so doesn't alter the logic elsewhere. + */ + if (XLogFileIsNowFullyRestored(tli, log, seg)) + { + snprintf(path, MAXPGPATH, XLOGDIR "/%s", TMPRECOVERYXLOG); + restoredFromArchive = true; + break; + } + /* Report recovery progress in PS display */ snprintf(activitymsg, sizeof(activitymsg), "waiting for %s", xlogfname); set_ps_display(activitymsg, false); restoredFromArchive = RestoreArchivedFile(path, xlogfname, - "RECOVERYXLOG", + TMPRECOVERYXLOG, XLogSegSize); + if (!restoredFromArchive) + { + LWLockRelease(WALRestoreCommandLock); return -1; + } break; case XLOG_FROM_PG_XLOG: @@ -2748,18 +2774,42 @@ XLogFileRead(uint32 log, uint32 seg, int emode, TimeLineID tli, if (stat(xlogfpath, &statbuf) == 0) { if (unlink(xlogfpath) != 0) + { + LWLockRelease(WALRestoreCommandLock); ereport(FATAL, (errcode_for_file_access(), errmsg("could not remove file \"%s\": %m", xlogfpath))); + } reload = true; } if (rename(path, xlogfpath) < 0) + { + LWLockRelease(WALRestoreCommandLock); ereport(ERROR, (errcode_for_file_access(), errmsg("could not rename file \"%s\" to \"%s\": %m", path, xlogfpath))); + } + + /* + * Make sure we recover from the new filename, so we can reuse the + * temporary filename for asynchronous restore actions. + */ + strcpy(path, xlogfpath); + + /* + * Tell the WALRestore process to get the next file now. + * Hopefully it will be ready for use in time for the next call the + * Startup process makes to XLogFileRead(). + * + * It might seem like we should do that earlier but then there is a + * race condition that might lead to replacing RECOVERYXLOG with + * another file before we've copied it. + */ + SetNextWALRestoreLogSeg(tli, log, seg); + LWLockRelease(WALRestoreCommandLock); /* * If the existing segment was replaced, since walsenders might have @@ -2911,8 +2961,11 @@ XLogFileClose(void) * For fixed-size files, the caller may pass the expected size as an * additional crosscheck on successful recovery. If the file size is not * known, set expectedSize = 0. + * + * Must be called with WALRestoreCommandLock held and must be held at exit, + * if the function returns. */ -static bool +bool RestoreArchivedFile(char *path, const char *xlogfname, const char *recovername, off_t expectedSize) { @@ -2929,7 +2982,7 @@ RestoreArchivedFile(char *path, const char *xlogfname, uint32 restartSeg; /* In standby mode, restore_command might not be supplied */ - if (recoveryRestoreCommand == NULL) + if (strlen(GetRecoveryRestoreCommand()) == 0) goto not_available; /* @@ -2963,18 +3016,24 @@ RestoreArchivedFile(char *path, const char *xlogfname, if (stat(xlogpath, &stat_buf) != 0) { if (errno != ENOENT) + { + LWLockRelease(WALRestoreCommandLock); ereport(FATAL, (errcode_for_file_access(), errmsg("could not stat file \"%s\": %m", xlogpath))); + } } else { if (unlink(xlogpath) != 0) + { + LWLockRelease(WALRestoreCommandLock); ereport(FATAL, (errcode_for_file_access(), errmsg("could not remove file \"%s\": %m", xlogpath))); + } } /* @@ -3013,7 +3072,7 @@ RestoreArchivedFile(char *path, const char *xlogfname, endp = xlogRestoreCmd + MAXPGPATH - 1; *endp = '\0'; - for (sp = recoveryRestoreCommand; *sp; sp++) + for (sp = GetRecoveryRestoreCommand(); *sp; sp++) { if (*sp == '%') { @@ -3059,21 +3118,29 @@ RestoreArchivedFile(char *path, const char *xlogfname, } *dp = '\0'; - ereport(DEBUG3, + ereport(DEBUG2, (errmsg_internal("executing restore command \"%s\"", xlogRestoreCmd))); /* - * Check signals before restore command and reset afterwards. + * Set in_restore_command to tell the signal handler that we should exit + * right away on SIGTERM. We know that we're at a safe point to do that. + * Check if we had already received the signal, so that we don't miss a + * shutdown request received just before this. */ - PreRestoreCommand(); + in_restore_command = true; + if (startup_shutdown_requested || walrestore_shutdown_requested) + { + LWLockRelease(WALRestoreCommandLock); + proc_exit(1); + } /* * Copy xlog from archival storage to XLOGDIR */ rc = system(xlogRestoreCmd); - PostRestoreCommand(); + in_restore_command = false; if (rc == 0) { @@ -3102,7 +3169,10 @@ RestoreArchivedFile(char *path, const char *xlogfname, if (StandbyMode && stat_buf.st_size < expectedSize) elevel = DEBUG1; else + { + LWLockRelease(WALRestoreCommandLock); elevel = FATAL; + } ereport(elevel, (errmsg("archive file \"%s\" has wrong size: %lu instead of %lu", xlogfname, @@ -3123,10 +3193,13 @@ RestoreArchivedFile(char *path, const char *xlogfname, { /* stat failed */ if (errno != ENOENT) + { + LWLockRelease(WALRestoreCommandLock); ereport(FATAL, (errcode_for_file_access(), errmsg("could not stat file \"%s\": %m", xlogpath))); + } } } @@ -3158,10 +3231,18 @@ RestoreArchivedFile(char *path, const char *xlogfname, * too. */ if (WIFSIGNALED(rc) && WTERMSIG(rc) == SIGTERM) + { + LWLockRelease(WALRestoreCommandLock); proc_exit(1); + } signaled = WIFSIGNALED(rc) || WEXITSTATUS(rc) > 125; + /* + * If signaled we will immediately issue a FATAL error so drop the lock + */ + if (signaled) + LWLockRelease(WALRestoreCommandLock); ereport(signaled ? FATAL : DEBUG2, (errmsg("could not restore file \"%s\" from archive: return code %d", xlogfname, rc))); @@ -4203,7 +4284,9 @@ readTimeLineHistory(TimeLineID targetTLI) if (InArchiveRecovery) { TLHistoryFileName(histfname, targetTLI); + LWLockAcquire(WALRestoreCommandLock, LW_EXCLUSIVE); RestoreArchivedFile(path, histfname, "RECOVERYHISTORY", 0); + LWLockRelease(WALRestoreCommandLock); } else TLHistoryFilePath(path, targetTLI); @@ -4292,7 +4375,9 @@ existsTimeLineHistory(TimeLineID probeTLI) if (InArchiveRecovery) { TLHistoryFileName(histfname, probeTLI); + LWLockAcquire(WALRestoreCommandLock, LW_EXCLUSIVE); RestoreArchivedFile(path, histfname, "RECOVERYHISTORY", 0); + LWLockRelease(WALRestoreCommandLock); } else TLHistoryFilePath(path, probeTLI); @@ -4453,7 +4538,9 @@ writeTimeLineHistory(TimeLineID newTLI, TimeLineID parentTLI, if (InArchiveRecovery) { TLHistoryFileName(histfname, parentTLI); + LWLockAcquire(WALRestoreCommandLock, LW_EXCLUSIVE); RestoreArchivedFile(path, histfname, "RECOVERYHISTORY", 0); + LWLockRelease(WALRestoreCommandLock); } else TLHistoryFilePath(path, parentTLI); @@ -5299,10 +5386,10 @@ readRecoveryCommandFile(void) { if (strcmp(item->name, "restore_command") == 0) { - recoveryRestoreCommand = pstrdup(item->value); + SetRecoveryRestoreCommand(pstrdup(item->value)); ereport(DEBUG2, (errmsg_internal("restore_command = '%s'", - recoveryRestoreCommand))); + GetRecoveryRestoreCommand()))); } else if (strcmp(item->name, "recovery_end_command") == 0) { @@ -5455,7 +5542,7 @@ readRecoveryCommandFile(void) */ if (StandbyMode) { - if (PrimaryConnInfo == NULL && recoveryRestoreCommand == NULL) + if (PrimaryConnInfo == NULL && strlen(GetRecoveryRestoreCommand()) == 0) ereport(WARNING, (errmsg("recovery command file \"%s\" specified neither primary_conninfo nor restore_command", RECOVERY_COMMAND_FILE), @@ -5463,7 +5550,7 @@ readRecoveryCommandFile(void) } else { - if (recoveryRestoreCommand == NULL) + if (strlen(GetRecoveryRestoreCommand()) == 0) ereport(FATAL, (errmsg("recovery command file \"%s\" must specify restore_command when standby mode is not enabled", RECOVERY_COMMAND_FILE))); @@ -6432,7 +6519,7 @@ StartupXLOG(void) PublishStartupProcessInformation(); SetForwardFsyncRequests(); SendPostmasterSignal(PMSIGNAL_RECOVERY_STARTED); - bgwriterLaunched = true; + ArchRecoveryBgProcsActive = true; } /* @@ -6795,7 +6882,7 @@ StartupXLOG(void) * the rule that TLI only changes in shutdown checkpoints, which * allows some extra error checking in xlog_redo. */ - if (bgwriterLaunched) + if (ArchRecoveryBgProcsActive) RequestCheckpoint(CHECKPOINT_END_OF_RECOVERY | CHECKPOINT_IMMEDIATE | CHECKPOINT_WAIT); @@ -9640,7 +9727,7 @@ XLogPageRead(XLogRecPtr *RecPtr, int emode, bool fetching_ckpt, * Request a restartpoint if we've replayed too much * xlog since the last one. */ - if (StandbyMode && bgwriterLaunched) + if (StandbyMode && ArchRecoveryBgProcsActive) { if (XLogCheckpointNeeded(readId, readSeg)) { diff --git a/src/backend/bootstrap/bootstrap.c b/src/backend/bootstrap/bootstrap.c index e3ae92d..81a8cb3 100644 --- a/src/backend/bootstrap/bootstrap.c +++ b/src/backend/bootstrap/bootstrap.c @@ -30,6 +30,7 @@ #include "nodes/makefuncs.h" #include "postmaster/bgwriter.h" #include "postmaster/startup.h" +#include "postmaster/walrestore.h" #include "postmaster/walwriter.h" #include "replication/walreceiver.h" #include "storage/bufmgr.h" @@ -319,6 +320,9 @@ AuxiliaryProcessMain(int argc, char *argv[]) case CheckpointerProcess: statmsg = "checkpointer process"; break; + case WalRestoreProcess: + statmsg = "wal restore process"; + break; case WalWriterProcess: statmsg = "wal writer process"; break; @@ -424,6 +428,11 @@ AuxiliaryProcessMain(int argc, char *argv[]) CheckpointerMain(); proc_exit(1); /* should never return */ + case WalRestoreProcess: + /* don't set signals, wal restore has its own agenda */ + WalRestoreMain(); + proc_exit(1); /* should never return */ + case WalWriterProcess: /* don't set signals, walwriter has its own agenda */ InitXLOGAccess(); diff --git a/src/backend/postmaster/Makefile b/src/backend/postmaster/Makefile index 3056b09..349e722 100644 --- a/src/backend/postmaster/Makefile +++ b/src/backend/postmaster/Makefile @@ -13,6 +13,6 @@ top_builddir = ../../.. include $(top_builddir)/src/Makefile.global OBJS = autovacuum.o bgwriter.o fork_process.o pgarch.o pgstat.o postmaster.o \ - startup.o syslogger.o walwriter.o checkpointer.o + startup.o syslogger.o walrestore.o walwriter.o checkpointer.o include $(top_srcdir)/src/backend/common.mk diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c index ad0c17a..15684c0 100644 --- a/src/backend/postmaster/postmaster.c +++ b/src/backend/postmaster/postmaster.c @@ -210,6 +210,7 @@ static pid_t StartupPID = 0, BgWriterPID = 0, CheckpointerPID = 0, WalWriterPID = 0, + WalRestorePID = 0, WalReceiverPID = 0, AutoVacPID = 0, PgArchPID = 0, @@ -470,6 +471,7 @@ static void ShmemBackendArrayRemove(Backend *bn); #define StartCheckpointer() StartChildProcess(CheckpointerProcess) #define StartWalWriter() StartChildProcess(WalWriterProcess) #define StartWalReceiver() StartChildProcess(WalReceiverProcess) +#define StartWalRestore() StartChildProcess(WalRestoreProcess) /* Macros to check exit status of a child process */ #define EXIT_STATUS_0(st) ((st) == 0) @@ -2060,6 +2062,8 @@ SIGHUP_handler(SIGNAL_ARGS) signal_child(WalWriterPID, SIGHUP); if (WalReceiverPID != 0) signal_child(WalReceiverPID, SIGHUP); + if (WalRestorePID != 0) + signal_child(WalRestorePID, SIGHUP); if (AutoVacPID != 0) signal_child(AutoVacPID, SIGHUP); if (PgArchPID != 0) @@ -2170,6 +2174,8 @@ pmdie(SIGNAL_ARGS) signal_child(StartupPID, SIGTERM); if (WalReceiverPID != 0) signal_child(WalReceiverPID, SIGTERM); + if (WalRestorePID != 0) + signal_child(WalRestorePID, SIGTERM); if (BgWriterPID != 0) signal_child(BgWriterPID, SIGTERM); if (pmState == PM_RECOVERY) @@ -2225,6 +2231,8 @@ pmdie(SIGNAL_ARGS) signal_child(WalWriterPID, SIGQUIT); if (WalReceiverPID != 0) signal_child(WalReceiverPID, SIGQUIT); + if (WalRestorePID != 0) + signal_child(WalRestorePID, SIGQUIT); if (AutoVacPID != 0) signal_child(AutoVacPID, SIGQUIT); if (PgArchPID != 0) @@ -2331,6 +2339,12 @@ reaper(SIGNAL_ARGS) pmState = PM_RUN; /* + * Shutdown the WALRestore process + */ + if (WalRestorePID != 0) + signal_child(WalRestorePID, SIGTERM); + + /* * Kill any walsenders to force the downstream standby(s) to * reread the timeline history file, adjust their timelines and * establish replication connections again. This is required @@ -2477,6 +2491,30 @@ reaper(SIGNAL_ARGS) } /* + * Was it the wal restore? If exit status is zero (normal) or one + * (FATAL exit), we assume everything is all right just like normal + * backends. + */ + if (pid == WalRestorePID) + { + if (pmState >= PM_RUN) + { + WalRestorePID = 0; + continue; + } + + /* + * Any unexpected exit (including FATAL exit) of the WALRestore + * process is treated as a crash, except that we don't want to + * reinitialize because availability is important. + */ + RecoveryError = true; + HandleChildCrash(pid, exitstatus, + _("walrestore process")); + continue; + } + + /* * Was it the autovacuum launcher? Normal exit can be ignored; we'll * start a new one at the next iteration of the postmaster's main * loop, if necessary. Any other exit condition is treated as a @@ -2756,6 +2794,18 @@ HandleChildCrash(int pid, int exitstatus, const char *procname) signal_child(WalReceiverPID, (SendStop ? SIGSTOP : SIGQUIT)); } + /* Take care of the walrestore too */ + if (pid == WalRestorePID) + WalRestorePID = 0; + else if (WalRestorePID != 0 && !FatalError) + { + ereport(DEBUG2, + (errmsg_internal("sending %s to process %d", + (SendStop ? "SIGSTOP" : "SIGQUIT"), + (int) WalRestorePID))); + signal_child(WalRestorePID, (SendStop ? SIGSTOP : SIGQUIT)); + } + /* Take care of the autovacuum launcher too */ if (pid == AutoVacPID) AutoVacPID = 0; @@ -2916,6 +2966,8 @@ PostmasterStateMachine(void) signal_child(StartupPID, SIGTERM); if (WalReceiverPID != 0) signal_child(WalReceiverPID, SIGTERM); + if (WalRestorePID != 0) + signal_child(WalRestorePID, SIGTERM); pmState = PM_WAIT_BACKENDS; } } @@ -2940,6 +2992,7 @@ PostmasterStateMachine(void) if (CountChildren(BACKEND_TYPE_NORMAL | BACKEND_TYPE_AUTOVAC) == 0 && StartupPID == 0 && WalReceiverPID == 0 && + WalRestorePID == 0 && BgWriterPID == 0 && (CheckpointerPID == 0 || !FatalError) && WalWriterPID == 0 && @@ -3005,11 +3058,11 @@ PostmasterStateMachine(void) * left by now anyway; what we're really waiting for is walsenders and * archiver. * - * Walreceiver should normally be dead by now, but not when a fast - * shutdown is performed during recovery. + * Walreceiver and Walrestore should normally be dead by now, but not + * when a fast shutdown is performed during recovery. */ if (PgArchPID == 0 && CountChildren(BACKEND_TYPE_ALL) == 0 && - WalReceiverPID == 0) + WalReceiverPID == 0 && WalRestorePID == 0) { pmState = PM_WAIT_DEAD_END; } @@ -3036,6 +3089,7 @@ PostmasterStateMachine(void) /* These other guys should be dead already */ Assert(StartupPID == 0); Assert(WalReceiverPID == 0); + Assert(WalRestorePID == 0); Assert(BgWriterPID == 0); Assert(CheckpointerPID == 0); Assert(WalWriterPID == 0); @@ -4219,6 +4273,8 @@ sigusr1_handler(SIGNAL_ARGS) BgWriterPID = StartBackgroundWriter(); Assert(CheckpointerPID == 0); CheckpointerPID = StartCheckpointer(); + Assert(WalRestorePID == 0); + WalRestorePID = StartWalRestore(); pmState = PM_RECOVERY; } diff --git a/src/backend/postmaster/startup.c b/src/backend/postmaster/startup.c index ed75d09..1791feb 100644 --- a/src/backend/postmaster/startup.c +++ b/src/backend/postmaster/startup.c @@ -35,14 +35,14 @@ * Flags set by interrupt handlers for later service in the redo loop. */ static volatile sig_atomic_t got_SIGHUP = false; -static volatile sig_atomic_t shutdown_requested = false; static volatile sig_atomic_t promote_triggered = false; +volatile sig_atomic_t startup_shutdown_requested = false; /* * Flag set when executing a restore command, to tell SIGTERM signal handler * that it's safe to just proc_exit. */ -static volatile sig_atomic_t in_restore_command = false; +volatile sig_atomic_t in_restore_command = false; /* Signal handlers */ static void startupproc_quickdie(SIGNAL_ARGS); @@ -131,9 +131,16 @@ StartupProcShutdownHandler(SIGNAL_ARGS) int save_errno = errno; if (in_restore_command) + { + /* + * See RestoreArchivedFile() for explanation of why this + * lock is always held when in_restore_command is true. + */ + LWLockRelease(WALRestoreCommandLock); proc_exit(1); + } else - shutdown_requested = true; + startup_shutdown_requested = true; WakeupRecovery(); errno = save_errno; @@ -155,7 +162,7 @@ HandleStartupProcInterrupts(void) /* * Check if we were requested to exit without finishing recovery. */ - if (shutdown_requested) + if (startup_shutdown_requested) proc_exit(1); /* @@ -226,26 +233,6 @@ StartupProcessMain(void) proc_exit(0); } -void -PreRestoreCommand(void) -{ - /* - * Set in_restore_command to tell the signal handler that we should exit - * right away on SIGTERM. We know that we're at a safe point to do that. - * Check if we had already received the signal, so that we don't miss a - * shutdown request received just before this. - */ - in_restore_command = true; - if (shutdown_requested) - proc_exit(1); -} - -void -PostRestoreCommand(void) -{ - in_restore_command = false; -} - bool IsPromoteTriggered(void) { diff --git a/src/backend/postmaster/walrestore.c b/src/backend/postmaster/walrestore.c new file mode 100644 index 0000000..7634d36 --- /dev/null +++ b/src/backend/postmaster/walrestore.c @@ -0,0 +1,474 @@ +/*------------------------------------------------------------------------- + * + * walrestore.c + * + * The WAL restore process is new as of Postgres 9.2, though the work it performs + * has been handled by the startup process from Postgres 8.0 until 9.1. + * + * WALRestore process executes the restore_command. If not set, it sleeps. + * The startup process no longer executes the restore_command and knows + * little about where the WAL files have come from. + * + * The WAL restore process is started by the postmaster when we enter + * PM_RECOVERY state and exits immediately after startup finishes. + * It remains alive until the postmaster commands it to terminate. + * Normal termination is by SIGTERM, which instructs restore process to exit(0). + * Like any backend, restore process will simply abort and exit on SIGQUIT. + * + * Note that the WAL restore process only executes the restore_command. + * The archive_cleanup_command is exeuted by the checkpointer, while the + * recovery_end_command and requests for history files are executed by the + * startup process. That is not important to the way those commands execute. + * All processes that use the restore_command must hold WALRestoreCommandLock + * before they execute it, since we definitely wish to avoid trying to get the + * same file more than once concurrently, plus we can't assume that the + * user has specified command that would succeed if run concurrently. + * + * If the WAL restore exits unexpectedly, the postmaster treats that the same + * as a backend crash: shared memory may be corrupted, so remaining backends + * should be killed by SIGQUIT and then a recovery cycle started. + * + * + * Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group + * + * + * IDENTIFICATION + * src/backend/postmaster/walrestore.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include <signal.h> +#include <sys/time.h> +#include <time.h> +#include <unistd.h> + +#include "access/xlog_internal.h" +#include "libpq/pqsignal.h" +#include "miscadmin.h" +#include "pgstat.h" +#include "postmaster/startup.h" +#include "postmaster/walrestore.h" +#include "storage/bufmgr.h" +#include "storage/ipc.h" +#include "storage/lwlock.h" +#include "storage/pmsignal.h" +#include "storage/shmem.h" +#include "utils/guc.h" +#include "utils/ps_status.h" +#include "utils/timestamp.h" + +/* XXX Set to DEBUG4 prior to patch commit */ +#define WALRSTR_DEBUG_LEVEL LOG + +/* + * GUC parameters + */ +int WalRestoreDelay = 10000; + +WalRestoreData *WalRstr = NULL; + +/* + * Flags set by interrupt handlers for later service in the main loop. + */ +static volatile sig_atomic_t got_SIGHUP = false; +volatile sig_atomic_t walrestore_shutdown_requested = false; + +/* Prototypes for private functions */ + +static bool WalRestoreNextFile(void); + +/* Signal handlers */ + +static void walrestore_quickdie(SIGNAL_ARGS); +static void WalRestoreProcSigUsr1Handler(SIGNAL_ARGS); +static void WalRestoreSigHupHandler(SIGNAL_ARGS); +static void WalRestoreShutdownHandler(SIGNAL_ARGS); + + +/* + * Main entry point for walrestore process + * + * This is invoked from BootstrapMain, which has already created the basic + * execution environment, but not enabled signals yet. + */ +void +WalRestoreMain(void) +{ + /* use volatile pointer to prevent code rearrangement */ + volatile WalRestoreData *walrstr = WalRstr; + + /* + * WalRstr should be set up already (if we are a backend, we inherit this + * by fork() or EXEC_BACKEND mechanism from the postmaster). + */ + Assert(walrstr != NULL); + + InitLatch(&walrstr->WALRestoreLatch); /* initialize latch used in main loop */ + + /* + * If possible, make this process a group leader, so that the postmaster + * can signal any child processes too. + */ +#ifdef HAVE_SETSID + if (setsid() < 0) + elog(FATAL, "setsid() failed: %m"); +#endif + + /* + * Properly accept or ignore signals the postmaster might send us + * + * SIGUSR1 is presently unused; keep it spare in case someday we want this + * process to participate in ProcSignal signalling. + */ + pqsignal(SIGHUP, WalRestoreSigHupHandler); /* set flag to read config file */ + pqsignal(SIGINT, SIG_IGN); + pqsignal(SIGTERM, WalRestoreShutdownHandler); /* shutdown */ + pqsignal(SIGQUIT, walrestore_quickdie); /* hard crash time */ + pqsignal(SIGALRM, SIG_IGN); + pqsignal(SIGPIPE, SIG_IGN); + pqsignal(SIGUSR1, WalRestoreProcSigUsr1Handler); /* reserve for ProcSignal */ + pqsignal(SIGUSR2, SIG_IGN); + + /* + * Reset some signals that are accepted by postmaster but not here + */ + pqsignal(SIGCHLD, SIG_DFL); + pqsignal(SIGTTIN, SIG_DFL); + pqsignal(SIGTTOU, SIG_DFL); + pqsignal(SIGCONT, SIG_DFL); + pqsignal(SIGWINCH, SIG_DFL); + + /* We allow SIGQUIT (quickdie) at all times */ + sigdelset(&BlockSig, SIGQUIT); + + /* + * Unblock signals (they were blocked when the postmaster forked us) + */ + PG_SETMASK(&UnBlockSig); + + /* + * Loop forever + */ + for (;;) + { + ResetLatch(&walrstr->WALRestoreLatch); + + /* + * Emergency bailout if postmaster has died. This is to avoid the + * necessity for manual cleanup of all postmaster children. + */ + if (!PostmasterIsAlive()) + exit(1); + + if (got_SIGHUP) + { + got_SIGHUP = false; + ProcessConfigFile(PGC_SIGHUP); + } + + if (walrestore_shutdown_requested) + { + /* + * From here on, elog(ERROR) should end with exit(1), not send + * control back to the sigsetjmp block above + */ + ExitOnAnyError = true; + /* Normal exit from the walwriter is here */ + proc_exit(0); /* done */ + } + + /* + * Keep restoring as long as there are files to process and we have + * not exceeded wal_keep_files + */ + if (!WalRestoreNextFile()) + { + (void) WaitLatch(&walrstr->WALRestoreLatch, + WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH, + WalRestoreDelay /* ms */); + } + } +} + +/* + * SetNextWALRestoreLogSeg - set the target for next WALrestore cycle + * + * Only called by Startup process + * + * Must be called with WALRestoreCommandLock held and must be held at exit, + * if the function returns. + */ +void +SetNextWALRestoreLogSeg(TimeLineID tli, uint32 log, uint32 seg) +{ + char xlogfname[MAXFNAMELEN]; + uint32 newlog = log; + uint32 newseg = seg; + + NextLogSeg(newlog, newseg); + + XLogFileName(xlogfname, tli, newlog, newseg); + elog(WALRSTR_DEBUG_LEVEL, "requesting restore of %s", xlogfname); + + { + /* use volatile pointer to prevent code rearrangement */ + volatile WalRestoreData *walrstr = WalRstr; + + walrstr->nextFileTli = tli; + walrstr->nextFileLog = newlog; + walrstr->nextFileSeg = newseg; + } + + SetLatch(&WalRstr->WALRestoreLatch); +} + +/* + * Run in Startup process to see if next file has arrived. We protect + * WalRstr with a LWlock so that the Startup process will wait until + * the restore_command succeeds or is cancelled. We set interrupt flags + * as if we were running the restore_command ourselves; there is no + * difference. + * + * WALRestoreCommandLock is not held on entry, but will be held at exit. + */ +bool +XLogFileIsNowFullyRestored(TimeLineID tli, uint32 log, uint32 seg) +{ + /* use volatile pointer to prevent code rearrangement */ + volatile WalRestoreData *walrstr = WalRstr; + char xlogfname[MAXFNAMELEN]; + + /* + * Issue debug message before we wait for the lock, to allow + * log entries to show interleaving of Startup and WALRestore actions + */ + XLogFileName(xlogfname, tli, log, seg); + elog(WALRSTR_DEBUG_LEVEL, + "startup process requests %s from archive", xlogfname); + + LWLockAcquire(WALRestoreCommandLock, LW_EXCLUSIVE); + + XLogFileName(xlogfname, + walrstr->lastFileTli, + walrstr->lastFileLog, + walrstr->lastFileSeg); + elog(WALRSTR_DEBUG_LEVEL, + "startup process sees last file was %s", xlogfname); + + if (tli == walrstr->lastFileTli && + log == walrstr->lastFileLog && + seg == walrstr->lastFileSeg) + return true; + + return false; +} + +/* + * WalRestoreNextFile - returns true if next file was restored + * + * Broadly follows the logic in XLogFileRead() when called with source of + * XLOG_FROM_ARCHIVE, except we have to read the next file from shmem. + */ +static bool +WalRestoreNextFile(void) +{ + /* use volatile pointer to prevent code rearrangement */ + volatile WalRestoreData *walrstr = WalRstr; + char xlogfname[MAXFNAMELEN]; + char activitymsg[MAXFNAMELEN + 16]; + char path[MAXPGPATH]; + bool restoredFromArchive; + uint32 nextFileLog; + uint32 nextFileSeg; + TimeLineID nextFileTli; + + elog(WALRSTR_DEBUG_LEVEL, "walrestore checking for next file to restore"); + + LWLockAcquire(WALRestoreCommandLock, LW_EXCLUSIVE); + + { + /* use volatile pointer to prevent code rearrangement */ + volatile WalRestoreData *walrstr = WalRstr; + + nextFileTli = walrstr->nextFileTli; + nextFileLog = walrstr->nextFileLog; + nextFileSeg = walrstr->nextFileSeg; + } + + /* + * If we aren't being requested to restore a file exit quickly. + */ + if (nextFileTli == walrstr->lastFileTli && + nextFileLog == walrstr->lastFileLog && + nextFileSeg == walrstr->lastFileSeg) + { + LWLockRelease(WALRestoreCommandLock); + XLogFileName(xlogfname, nextFileTli, nextFileLog, nextFileSeg); + elog(WALRSTR_DEBUG_LEVEL, + "restore of %s is already complete, so sleep", xlogfname); + return false; + } + + XLogFileName(xlogfname, nextFileTli, nextFileLog, nextFileSeg); + + /* Report recovery progress in PS display */ + snprintf(activitymsg, sizeof(activitymsg), "waiting for %s", + xlogfname); + set_ps_display(activitymsg, false); + + elog(WALRSTR_DEBUG_LEVEL, "walrestore will restore %s", xlogfname); + + restoredFromArchive = RestoreArchivedFile(path, xlogfname, + "RECOVERYXLOG", + XLogSegSize); + + if (restoredFromArchive) + { + /* use volatile pointer to prevent code rearrangement */ + volatile WalRestoreData *walrstr = WalRstr; + + walrstr->lastFileTli = nextFileTli; + walrstr->lastFileLog = nextFileLog; + walrstr->lastFileSeg = nextFileSeg; + walrstr->lastFileRestoreTime = GetCurrentTimestamp(); + } + + LWLockRelease(WALRestoreCommandLock); + + set_ps_display("", false); + + /* + * Make sure Startup process is active so it can see new file, or + * react to it not being there. + */ + WakeupRecovery(); + + return restoredFromArchive; +} + +void +SetRecoveryRestoreCommand(char *cmd) +{ + if (cmd == NULL) + return; + + if (strlen(cmd) <= MAXPGPATH) + strcpy(WalRstr->recoveryRestoreCommand, cmd); + else + elog(FATAL, "recovery_restore_command is too long"); +} + +char * +GetRecoveryRestoreCommand(void) +{ + return WalRstr->recoveryRestoreCommand; +} + +/* Report shared memory space needed by WalRestoreShmemInit */ +Size +WalRestoreShmemSize(void) +{ + Size size = 0; + + size = add_size(size, sizeof(WalRestoreData)); + + return size; +} + +/* Allocate and initialize walrestore-related shared memory */ +void +WalRestoreShmemInit(void) +{ + bool found; + + WalRstr = (WalRestoreData *) + ShmemInitStruct("Wal Restore Ctl", WalRestoreShmemSize(), &found); + + if (found) + return; + + /* First time through, so initialize */ + MemSet(WalRstr, 0, WalRestoreShmemSize()); + InitSharedLatch(&WalRstr->WALRestoreLatch); + +} + +/* -------------------------------- + * signal handler routines + * -------------------------------- + */ + +/* + * walrestore_quickdie() occurs when signalled SIGQUIT by the postmaster. + * + * Some backend has bought the farm, + * so we need to stop what we're doing and exit. + */ +static void +walrestore_quickdie(SIGNAL_ARGS) +{ + PG_SETMASK(&BlockSig); + + /* + * We DO NOT want to run proc_exit() callbacks -- we're here because + * shared memory may be corrupted, so we don't want to try to clean up our + * transaction. Just nail the windows shut and get out of town. Now that + * there's an atexit callback to prevent third-party code from breaking + * things by calling exit() directly, we have to reset the callbacks + * explicitly to make this work as intended. + */ + on_exit_reset(); + + /* + * Note we do exit(2) not exit(0). This is to force the postmaster into a + * system reset cycle if some idiot DBA sends a manual SIGQUIT to a random + * backend. This is necessary precisely because we don't clean up our + * shared memory state. (The "dead man switch" mechanism in pmsignal.c + * should ensure the postmaster sees this as a crash, too, but no harm in + * being doubly sure.) + */ + exit(2); +} + +/* SIGUSR1: let latch facility handle the signal */ +static void +WalRestoreProcSigUsr1Handler(SIGNAL_ARGS) +{ + int save_errno = errno; + + latch_sigusr1_handler(); + + errno = save_errno; +} + +/* SIGHUP: set flag to re-read config file at next convenient time */ +static void +WalRestoreSigHupHandler(SIGNAL_ARGS) +{ + int save_errno = errno; + + got_SIGHUP = true; + SetLatch(&WalRstr->WALRestoreLatch); + + errno = save_errno; +} + +/* SIGTERM: set flag to shutdown and exit */ +static void +WalRestoreShutdownHandler(SIGNAL_ARGS) +{ + int save_errno = errno; + + if (in_restore_command) + { + LWLockRelease(WALRestoreCommandLock); + proc_exit(1); + } + else + walrestore_shutdown_requested = true; + SetLatch(&WalRstr->WALRestoreLatch); + + errno = save_errno; +} diff --git a/src/backend/storage/ipc/ipci.c b/src/backend/storage/ipc/ipci.c index ef1dc91..8f4443a 100644 --- a/src/backend/storage/ipc/ipci.c +++ b/src/backend/storage/ipc/ipci.c @@ -26,6 +26,7 @@ #include "postmaster/autovacuum.h" #include "postmaster/bgwriter.h" #include "postmaster/postmaster.h" +#include "postmaster/walrestore.h" #include "replication/walreceiver.h" #include "replication/walsender.h" #include "storage/bufmgr.h" @@ -123,6 +124,7 @@ CreateSharedMemoryAndSemaphores(bool makePrivate, int port) size = add_size(size, AutoVacuumShmemSize()); size = add_size(size, WalSndShmemSize()); size = add_size(size, WalRcvShmemSize()); + size = add_size(size, WalRestoreShmemSize()); size = add_size(size, BTreeShmemSize()); size = add_size(size, SyncScanShmemSize()); size = add_size(size, AsyncShmemSize()); @@ -228,6 +230,7 @@ CreateSharedMemoryAndSemaphores(bool makePrivate, int port) AutoVacuumShmemInit(); WalSndShmemInit(); WalRcvShmemInit(); + WalRestoreShmemInit(); /* * Set up other modules that need some shared memory space diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h index 1ddf4bf..e9e5325 100644 --- a/src/include/access/xlog.h +++ b/src/include/access/xlog.h @@ -270,7 +270,8 @@ extern bool XLogNeedsFlush(XLogRecPtr RecPtr); extern int XLogFileInit(uint32 log, uint32 seg, bool *use_existent, bool use_lock); extern int XLogFileOpen(uint32 log, uint32 seg); - +extern bool RestoreArchivedFile(char *path, const char *xlogfname, + const char *recovername, off_t expectedSize); extern void XLogGetLastRemoved(uint32 *log, uint32 *seg); extern void XLogSetAsyncXactLSN(XLogRecPtr record); @@ -316,6 +317,7 @@ extern TimeLineID GetRecoveryTargetTLI(void); extern bool CheckPromoteSignal(void); extern void WakeupRecovery(void); extern Latch *WALWriterLatch(void); +extern Latch *WALRestoreLatch(void); /* * Starting/stopping a base backup diff --git a/src/include/bootstrap/bootstrap.h b/src/include/bootstrap/bootstrap.h index e966a73..b90ce33 100644 --- a/src/include/bootstrap/bootstrap.h +++ b/src/include/bootstrap/bootstrap.h @@ -23,6 +23,7 @@ typedef enum StartupProcess, BgWriterProcess, CheckpointerProcess, + WalRestoreProcess, WalWriterProcess, WalReceiverProcess, diff --git a/src/include/postmaster/startup.h b/src/include/postmaster/startup.h index 3ec6950..35d9665 100644 --- a/src/include/postmaster/startup.h +++ b/src/include/postmaster/startup.h @@ -12,10 +12,11 @@ #ifndef _STARTUP_H #define _STARTUP_H +extern volatile sig_atomic_t startup_shutdown_requested; +extern volatile sig_atomic_t in_restore_command; + extern void HandleStartupProcInterrupts(void); extern void StartupProcessMain(void); -extern void PreRestoreCommand(void); -extern void PostRestoreCommand(void); extern bool IsPromoteTriggered(void); extern void ResetPromoteTriggered(void); diff --git a/src/include/postmaster/walrestore.h b/src/include/postmaster/walrestore.h new file mode 100644 index 0000000..98d7830 --- /dev/null +++ b/src/include/postmaster/walrestore.h @@ -0,0 +1,72 @@ +/*------------------------------------------------------------------------- + * + * walrestore.h + * Exports from postmaster/walrestore.c. + * + * Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group + * + * src/include/postmaster/walrestore.h + * + *------------------------------------------------------------------------- + */ +#ifndef _WALRESTORE_H +#define _WALRESTORE_H + +#include "access/xlog.h" +#include "access/xlogdefs.h" +#include "storage/spin.h" +#include "pgtime.h" + +extern volatile sig_atomic_t walrestore_shutdown_requested; + +/* GUC options */ + +extern void WalRestoreMain(void); +extern bool XLogFileIsNowFullyRestored(TimeLineID tli, uint32 log, uint32 seg); +extern void SetNextWALRestoreLogSeg(TimeLineID tli, uint32 log, uint32 seg); +extern void SetRecoveryRestoreCommand(char *cmd); +extern char *GetRecoveryRestoreCommand(void); +extern Size WalRestoreShmemSize(void); +extern void WalRestoreShmemInit(void); + +/* Shared memory area for management of walrestore process */ +typedef struct +{ + /* + * The identifiers of the last WAL file restored by WALrestore + */ + TimeLineID lastFileTli; + uint32 lastFileLog; + uint32 lastFileSeg; + + /* + * Time of last restore by WALrestore + */ + TimestampTz lastFileRestoreTime; + + /* + * The next WAL file requested for the WALrestore process to restore + */ + TimeLineID nextFileTli; + uint32 nextFileLog; + uint32 nextFileSeg; + + /* + * All of the above read and set only while holding WALRestoreCommandLock + */ + + /* + * WALRestoreLatch is used to wake up the WALRestore to restore WAL files. + */ + Latch WALRestoreLatch; + + /* + * recoveryRestoreCommand for use by walrestore; can remove if becomes GUC + * Set once at startup and read-only after that + */ + char recoveryRestoreCommand[MAXPGPATH]; +} WalRestoreData; + +extern WalRestoreData *WalRstr; + +#endif /* _WALRESTORE_H */ diff --git a/src/include/storage/lwlock.h b/src/include/storage/lwlock.h index df3df29..c316dcc 100644 --- a/src/include/storage/lwlock.h +++ b/src/include/storage/lwlock.h @@ -79,6 +79,7 @@ typedef enum LWLockId SerializablePredicateLockListLock, OldSerXidLock, SyncRepLock, + WALRestoreCommandLock, /* Individual lock IDs end here */ FirstBufMappingLock, FirstLockMgrLock = FirstBufMappingLock + NUM_BUFFER_PARTITIONS, diff --git a/src/include/storage/proc.h b/src/include/storage/proc.h index 358d1a4..f994b67 100644 --- a/src/include/storage/proc.h +++ b/src/include/storage/proc.h @@ -204,12 +204,21 @@ extern PGPROC *PreparedXactProcs; /* * We set aside some extra PGPROC structures for auxiliary processes, * ie things that aren't full-fledged backends but need shmem access. + * Logger, archiver and stats processes don't count towards this total, + * nor do WALSender processes. * + * NUM_AUXILIARY_PROCS must be set to the highest of the requirements for + * normal running and recovery. + * + * During normal running we need slots for: * Background writer, checkpointer and WAL writer run during normal operation. - * Startup process and WAL receiver also consume 2 slots, but WAL writer is - * launched only after startup has exited, so we only need 4 slots. + * 3 slots + * + * During recovery we need slots for: + * Background writer, checkpointer, Startup process, WAL receiver, WAL restore. + * 5 slots */ -#define NUM_AUXILIARY_PROCS 4 +#define NUM_AUXILIARY_PROCS 5 /* configurable options */
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers