Hi, > some comments on v2. Thanks for your comments. I have incorporated the changes and updated a new patch. Please find the details below.
> On the timeline switch, setting a flag should be enough, I don't think > that we need to wake up the archiver. Because it will just waste the > scan cycle. Yes, I modified it. > Why do we need multi level interfaces? I mean instead of calling first > XLogArchiveNotifyTLISwitch and then calling PgArchNotifyTLISwitch, > can't we directly call PgArchNotifyTLISwitch()? Yes, multilevel interfaces are not required. Removed extra interface. > + if (timeline_switch) > + { > + /* Perform a full directory scan in next cycle */ > + dirScan = true; > + timeline_switch = false; > + } > I suggest you can add some comments atop this check. Added comment to specify the action required in case of a timeline switch. > I think you should use %m in the error message so that it also prints > the OS error code. Done. > Why is this a global variable? I mean whenever you enter the function > pgarch_ArchiverCopyLoop(), this can be set to true and after that you > can pass this as inout parameter to pgarch_readyXlog() there in it can > be conditionally set to false once we get some segment and whenever > the timeline switch we can set it back to the true. Yes, It is not necessary to have global scope for "dirScan". Changed the scope to local for "dirScan" and "nextLogSegNo". PFA patch v3. Thanks, Dipesh
From 76260a2ebf90fd063e06dac701e560a506b7a2b7 Mon Sep 17 00:00:00 2001 From: Dipesh Pandit <dipesh.pan...@enterprisedb.com> Date: Wed, 30 Jun 2021 14:05:58 +0530 Subject: [PATCH] mitigate directory scan for WAL archiver WAL archiver scans the status directory to identify the next WAL file that needs to be archived. This directory scan can be minimised by maintaining the log segment number of current file which is being archived and incrementing it by '1' to get the next WAL file. Archiver can check the availability of next file and in case if the file is not available then it should fall-back to directory scan to get the oldest WAL file. If there is a timeline switch then backend sends a notification to archiver. Archiver registers the timeline switch and performs a full directory scan to make sure that archiving history files takes precedence over archiving WAL files --- src/backend/access/transam/xlog.c | 8 +++ src/backend/postmaster/pgarch.c | 131 ++++++++++++++++++++++++++++++++++---- src/include/postmaster/pgarch.h | 1 + 3 files changed, 128 insertions(+), 12 deletions(-) diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index c7c928f..baee37b 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -50,6 +50,7 @@ #include "port/atomics.h" #include "port/pg_iovec.h" #include "postmaster/bgwriter.h" +#include "postmaster/pgarch.h" #include "postmaster/startup.h" #include "postmaster/walwriter.h" #include "replication/basebackup.h" @@ -8130,6 +8131,13 @@ StartupXLOG(void) WalSndWakeup(); /* + * If archiver is active, send notification that timeline has switched. + */ + if (XLogArchivingActive() && ArchiveRecoveryRequested && + IsUnderPostmaster) + PgArchNotifyTLISwitch(); + + /* * If this was a promotion, request an (online) checkpoint now. This isn't * required for consistency, but the last restartpoint might be far back, * and in case of a crash, recovering from it might take a longer than is diff --git a/src/backend/postmaster/pgarch.c b/src/backend/postmaster/pgarch.c index 74a7d7c..b604966 100644 --- a/src/backend/postmaster/pgarch.c +++ b/src/backend/postmaster/pgarch.c @@ -90,16 +90,18 @@ static PgArchData *PgArch = NULL; * Flags set by interrupt handlers for later service in the main loop. */ static volatile sig_atomic_t ready_to_stop = false; +static volatile sig_atomic_t timeline_switch = false; /* ---------- * Local function forward declarations * ---------- */ static void pgarch_waken_stop(SIGNAL_ARGS); +static void pgarch_timeline_switch(SIGNAL_ARGS); static void pgarch_MainLoop(void); static void pgarch_ArchiverCopyLoop(void); static bool pgarch_archiveXlog(char *xlog); -static bool pgarch_readyXlog(char *xlog); +static bool pgarch_readyXlog(char *xlog, bool *dirScan, XLogSegNo *nextLogSegNo); static void pgarch_archiveDone(char *xlog); static void pgarch_die(int code, Datum arg); static void HandlePgArchInterrupts(void); @@ -169,10 +171,11 @@ PgArchiverMain(void) { /* * Ignore all signals usually bound to some action in the postmaster, - * except for SIGHUP, SIGTERM, SIGUSR1, SIGUSR2, and SIGQUIT. + * except for SIGHUP, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, and SIGQUIT. */ pqsignal(SIGHUP, SignalHandlerForConfigReload); - pqsignal(SIGINT, SIG_IGN); + /* Archiver is notified by backend if there is a timeline switch */ + pqsignal(SIGINT, pgarch_timeline_switch); pqsignal(SIGTERM, SignalHandlerForShutdownRequest); /* SIGQUIT handler was already set up by InitPostmasterChild */ pqsignal(SIGALRM, SIG_IGN); @@ -221,6 +224,23 @@ PgArchWakeup(void) SetLatch(&ProcGlobal->allProcs[arch_pgprocno].procLatch); } +/* + * Called by backend process to notify a timeline switch. + */ +void +PgArchNotifyTLISwitch(void) +{ + int arch_pgprocno = PgArch->pgprocno; + + if (arch_pgprocno != INVALID_PGPROCNO) + { + int archiver_pid = ProcGlobal->allProcs[arch_pgprocno].pid; + + if (kill(archiver_pid, SIGINT) < 0) + elog(ERROR, "could not notify timeline change to archiver: %m"); + } +} + /* SIGUSR2 signal handler for archiver process */ static void @@ -236,6 +256,22 @@ pgarch_waken_stop(SIGNAL_ARGS) } /* + * Interrupt handler for archiver + * + * There is a timeline switch and we have been notified by backend. + */ +static void +pgarch_timeline_switch(SIGNAL_ARGS) +{ + int save_errno = errno; + + /* Set the flag to register a timeline switch */ + timeline_switch = true; + + errno = save_errno; +} + +/* * pgarch_MainLoop * * Main loop for archiver @@ -324,18 +360,35 @@ static void pgarch_ArchiverCopyLoop(void) { char xlog[MAX_XFN_CHARS + 1]; + static XLogSegNo nextLogSegNo = 0; + static bool dirScan = true; /* * loop through all xlogs with archive_status of .ready and archive * them...mostly we expect this to be a single file, though it is possible * some backend will add files onto the list of those that need archiving * while we are still copying earlier archives + * + * "nextLogSegNo" identifies the next log file to be archived in a log + * sequence and the flag "dirScan" specifies a full directory scan to find + * the next log file. */ - while (pgarch_readyXlog(xlog)) + while (pgarch_readyXlog(xlog, &dirScan, &nextLogSegNo)) { int failures = 0; int failures_orphan = 0; + /* + * Timeline switch at backend, make sure that corresponding history + * file get archived in the next cycle. + */ + if (timeline_switch) + { + /* Perform a full directory scan in next cycle */ + dirScan = true; + timeline_switch = false; + } + for (;;) { struct stat stat_buf; @@ -411,6 +464,10 @@ pgarch_ArchiverCopyLoop(void) /* successful */ pgarch_archiveDone(xlog); + /* Increment log segment number to point to the next WAL file */ + if (!IsTLHistoryFileName(xlog)) + nextLogSegNo++; + /* * Tell the collector about the WAL file that we successfully * archived @@ -596,29 +653,63 @@ pgarch_archiveXlog(char *xlog) * larger ID; the net result being that past timelines are given higher * priority for archiving. This seems okay, or at least not obviously worth * changing. + * + * WAL files are generated in a specific order of log segment number. The + * directory scan for each WAL file can be minimised by identifying the next + * WAL file in the sequence. This can be achieved by maintaining log segment + * number and timeline ID corresponding to WAL file currently being archived. + * The log segment number of current WAL file can be incremented by '1' upon + * successful archival to point to the next WAL file. The full directory scan + * can be avoided by checking the availability of next WAL file. + * + * However, a full directory scan is performed in case if there is a timeline + * switch to make sure that archiving history file takes precedence over + * archiving WAL files from older timeline. */ static bool -pgarch_readyXlog(char *xlog) +pgarch_readyXlog(char *xlog, bool *dirScan, XLogSegNo *nextLogSegNo) { - /* - * open xlog status directory and read through list of xlogs that have the - * .ready suffix, looking for earliest file. It is possible to optimise - * this code, though only a single file is expected on the vast majority - * of calls, so.... - */ + char basename[MAX_XFN_CHARS + 1]; + char xlogready[MAXPGPATH]; char XLogArchiveStatusDir[MAXPGPATH]; DIR *rldir; struct dirent *rlde; + struct stat st; + static TimeLineID curFileTLI = 0; bool found = false; bool historyFound = false; + if (!(*dirScan)) + { + /* + * We already have the next anticipated log segment and timeline, check + * if this WAL is ready to be archived. If yes, skip the directory + * scan. + */ + XLogFileName(basename, curFileTLI, *nextLogSegNo, wal_segment_size); + StatusFilePath(xlogready, basename, ".ready"); + + if (stat(xlogready, &st) == 0) + { + strcpy(xlog, basename); + return true; + } + } + + /* + * Fall-back to directory scan + * + * open xlog status directory and read through list of xlogs that have the + * .ready suffix, looking for earliest file. It is possible to optimise + * this code, though only a single file is expected on the vast majority + * of calls, so.... + */ snprintf(XLogArchiveStatusDir, MAXPGPATH, XLOGDIR "/archive_status"); rldir = AllocateDir(XLogArchiveStatusDir); while ((rlde = ReadDir(rldir, XLogArchiveStatusDir)) != NULL) { int basenamelen = (int) strlen(rlde->d_name) - 6; - char basename[MAX_XFN_CHARS + 1]; bool ishistory; /* Ignore entries with unexpected number of characters */ @@ -661,6 +752,22 @@ pgarch_readyXlog(char *xlog) strcpy(xlog, basename); } } + + /* + * Found the oldest WAL, reset timeline ID and log segment number to generate + * the next WAL file in the sequence. + */ + if (found && !historyFound) + { + XLogFromFileName(xlog, &curFileTLI, nextLogSegNo, wal_segment_size); + ereport(LOG, + (errmsg("directory scan to archive write-ahead log file \"%s\"", + xlog))); + + /* Disable full directory scan until there is a timeline switch */ + *dirScan = false; + } + FreeDir(rldir); return found; diff --git a/src/include/postmaster/pgarch.h b/src/include/postmaster/pgarch.h index 1e47a14..e6d2e18 100644 --- a/src/include/postmaster/pgarch.h +++ b/src/include/postmaster/pgarch.h @@ -31,5 +31,6 @@ extern void PgArchShmemInit(void); extern bool PgArchCanRestart(void); extern void PgArchiverMain(void) pg_attribute_noreturn(); extern void PgArchWakeup(void); +extern void PgArchNotifyTLISwitch(void); #endif /* _PGARCH_H */ -- 1.8.3.1