Thanks for the feedback. > + StatusFilePath(archiveStatusPath, xlog, ".ready"); > + if (stat(archiveStatusPath, &stat_buf) == 0) > + PgArchEnableDirScan();
> We may want to call PgArchWakeup() after setting the flag. Yes, added a call to wake up archiver. > > + * - The next anticipated log segment is not available. > > > > I wonder if we really need to perform a directory scan in this case. > > Unless there are other cases where the .ready files are created out of > > order, I think this just causes an unnecessary directory scan every > > time the archiver catches up. > Thinking further, I suppose this is necessary for when lastSegNo gets > reset after processing an out-of-order .ready file. Also, this is necessary when lastTLI gets reset after switching to a new timeline. > + pg_atomic_flag dirScan; > I personally don't think it's necessary to use an atomic here. A > spinlock or LWLock would probably work just fine, as contention seems > unlikely. If we use a lock, we also don't have to worry about memory > barriers. History file should be archived as soon as it gets created. The atomic flag here will make sure that there is no reordering of read/write instructions while accessing the flag in shared memory. Archiver needs to read this flag at the beginning of each cycle. Write to atomic flag is synchronized and it provides a lockless read. I think an atomic flag here is an efficient choice unless I am missing something. Please find the attached patch v7. Thanks, Dipesh
From 55c42f851176a75881a55b1c75d624248169b876 Mon Sep 17 00:00:00 2001 From: Dipesh Pandit <dipesh.pan...@enterprisedb.com> Date: Wed, 30 Jun 2021 14:05:58 +0530 Subject: [PATCH] mitigate directory scan for WAL archiver WAL archiver scans the status directory to identify the next WAL file that needs to be archived. This directory scan can be minimised by maintaining the log segment number of current file which is being archived and incrementing it by '1' to get the next WAL file. Archiver can check the availability of next file and in case if the file is not available then it should fall-back to directory scan to get the oldest WAL file. If there is a timeline switch then archiver performs a full directory scan to make sure that archiving history file takes precedence over archiving WAL files on older timeline. --- src/backend/access/transam/xlog.c | 15 +++ src/backend/access/transam/xlogarchive.c | 12 +++ src/backend/postmaster/pgarch.c | 163 ++++++++++++++++++++++++++++--- src/include/postmaster/pgarch.h | 1 + 4 files changed, 178 insertions(+), 13 deletions(-) diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index f84c0bb..088ab43 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -50,6 +50,7 @@ #include "port/atomics.h" #include "port/pg_iovec.h" #include "postmaster/bgwriter.h" +#include "postmaster/pgarch.h" #include "postmaster/startup.h" #include "postmaster/walwriter.h" #include "replication/basebackup.h" @@ -7555,6 +7556,13 @@ StartupXLOG(void) */ if (AllowCascadeReplication()) WalSndWakeup(); + + /* + * Switched to a new timeline, notify archiver to enable + * directory scan. + */ + if (XLogArchivingActive()) + PgArchEnableDirScan(); } /* Exit loop if we reached inclusive recovery target */ @@ -7797,6 +7805,13 @@ StartupXLOG(void) EndRecPtr, reason); /* + * Switched to a new timeline, notify archiver to enable directory + * scan. + */ + if (XLogArchivingActive()) + PgArchEnableDirScan(); + + /* * Since there might be a partial WAL segment named RECOVERYXLOG, get * rid of it. */ diff --git a/src/backend/access/transam/xlogarchive.c b/src/backend/access/transam/xlogarchive.c index 26b023e..94c74f8 100644 --- a/src/backend/access/transam/xlogarchive.c +++ b/src/backend/access/transam/xlogarchive.c @@ -609,6 +609,18 @@ XLogArchiveCheckDone(const char *xlog) /* Retry creation of the .ready file */ XLogArchiveNotify(xlog); + + /* + * This .ready file is created out of order, notify archiver to perform + * a full directory scan to archive corresponding WAL file. + */ + StatusFilePath(archiveStatusPath, xlog, ".ready"); + if (stat(archiveStatusPath, &stat_buf) == 0) + { + PgArchEnableDirScan(); + PgArchWakeup(); + } + return false; } diff --git a/src/backend/postmaster/pgarch.c b/src/backend/postmaster/pgarch.c index 74a7d7c..e5ea7a6 100644 --- a/src/backend/postmaster/pgarch.c +++ b/src/backend/postmaster/pgarch.c @@ -76,8 +76,23 @@ typedef struct PgArchData { int pgprocno; /* pgprocno of archiver process */ + + /* + * Flag to enable/disable directory scan. If this flag is set then it + * forces archiver to perform a full directory scan to get the next log + * segment. + */ + pg_atomic_flag dirScan; } PgArchData; +/* + * Segment number and timeline ID to identify the next file in a WAL sequence + */ +typedef struct readyXLogState +{ + XLogSegNo lastSegNo; + TimeLineID lastTLI; +} readyXLogState; /* ---------- * Local data @@ -97,12 +112,13 @@ static volatile sig_atomic_t ready_to_stop = false; */ static void pgarch_waken_stop(SIGNAL_ARGS); static void pgarch_MainLoop(void); -static void pgarch_ArchiverCopyLoop(void); +static void pgarch_ArchiverCopyLoop(readyXLogState *xlogState); static bool pgarch_archiveXlog(char *xlog); -static bool pgarch_readyXlog(char *xlog); +static bool pgarch_readyXlog(char *xlog, readyXLogState *xlogState); static void pgarch_archiveDone(char *xlog); static void pgarch_die(int code, Datum arg); static void HandlePgArchInterrupts(void); +static void PgArchDisableDirScan(void); /* Report shared memory space needed by PgArchShmemInit */ Size @@ -129,6 +145,7 @@ PgArchShmemInit(void) /* First time through, so initialize */ MemSet(PgArch, 0, PgArchShmemSize()); PgArch->pgprocno = INVALID_PGPROCNO; + pg_atomic_init_flag(&PgArch->dirScan); } } @@ -221,6 +238,24 @@ PgArchWakeup(void) SetLatch(&ProcGlobal->allProcs[arch_pgprocno].procLatch); } +/* + * Set dirScan flag in shared memory. Backend notifies archiver in case if an + * action requires full directory scan to get the next log segment. + */ +void +PgArchEnableDirScan(void) +{ + pg_atomic_test_set_flag(&PgArch->dirScan); +} + +/* + * Reset dirScan flag in shared memory. + */ +static void +PgArchDisableDirScan(void) +{ + pg_atomic_clear_flag(&PgArch->dirScan); +} /* SIGUSR2 signal handler for archiver process */ static void @@ -243,10 +278,21 @@ pgarch_waken_stop(SIGNAL_ARGS) static void pgarch_MainLoop(void) { + readyXLogState xlogState; pg_time_t last_copy_time = 0; bool time_to_stop; /* + * Initialize xlogState, segment number and TLI will be reset/updated in + * function pgarch_readyXlog() for each cycle. + */ + xlogState.lastSegNo = 0; + xlogState.lastTLI = 0; + + /* First cycle after startup */ + PgArchEnableDirScan(); + + /* * There shouldn't be anything for the archiver to do except to wait for a * signal ... however, the archiver exists to protect our data, so she * wakes up occasionally to allow herself to be proactive. @@ -280,7 +326,7 @@ pgarch_MainLoop(void) } /* Do what we're here for */ - pgarch_ArchiverCopyLoop(); + pgarch_ArchiverCopyLoop(&xlogState); last_copy_time = time(NULL); /* @@ -321,7 +367,7 @@ pgarch_MainLoop(void) * Archives all outstanding xlogs then returns */ static void -pgarch_ArchiverCopyLoop(void) +pgarch_ArchiverCopyLoop(readyXLogState *xlogState) { char xlog[MAX_XFN_CHARS + 1]; @@ -331,7 +377,7 @@ pgarch_ArchiverCopyLoop(void) * some backend will add files onto the list of those that need archiving * while we are still copying earlier archives */ - while (pgarch_readyXlog(xlog)) + while (pgarch_readyXlog(xlog, xlogState)) { int failures = 0; int failures_orphan = 0; @@ -596,29 +642,99 @@ pgarch_archiveXlog(char *xlog) * larger ID; the net result being that past timelines are given higher * priority for archiving. This seems okay, or at least not obviously worth * changing. + * + * WAL files are generated in a specific order of log segment number. The + * directory scan for each WAL file can be minimised by identifying the next + * WAL file in the sequence. This can be achieved by maintaining log segment + * number and timeline ID corresponding to WAL file currently being archived. + * The log segment number of current WAL file can be incremented by '1' to + * point to the next WAL file in a sequence. Full directory scan can be avoided + * by checking the availability of next WAL file. "xlogState" specifies the + * segment number and timeline ID corresponding to the next WAL file. + * + * However, a full directory scan is performed in some special cases where it + * requires us to archive files which takes precedence over the next anticipated + * log segment. For example, history file takes precedence over archiving WAL + * files on older timeline or an older WAL file which is being left out because + * corresponding .ready file is created out of order. + * + * Returns "true" if a segment is ready for archival, "xlog" represents the + * name of the segment. */ static bool -pgarch_readyXlog(char *xlog) +pgarch_readyXlog(char *xlog, readyXLogState *xlogState) { - /* - * open xlog status directory and read through list of xlogs that have the - * .ready suffix, looking for earliest file. It is possible to optimise - * this code, though only a single file is expected on the vast majority - * of calls, so.... - */ + char basename[MAX_XFN_CHARS + 1]; + char xlogready[MAXPGPATH]; char XLogArchiveStatusDir[MAXPGPATH]; DIR *rldir; struct dirent *rlde; + struct stat st; bool found = false; bool historyFound = false; + /* + * Skip directory scan until it is not indicated by shared memory flag + * dirScan. + */ + if (pg_atomic_unlocked_test_flag(&PgArch->dirScan)) + { + /* + * We already have the next anticipated log segment and timeline, check + * if this WAL is ready to be archived. + */ + XLogFileName(basename, xlogState->lastTLI, xlogState->lastSegNo, wal_segment_size); + StatusFilePath(xlogready, basename, ".ready"); + + if (stat(xlogready, &st) == 0) + { + strcpy(xlog, basename); + + /* + * Increment the readyXLogState's lastSegNo to point to the next + * WAL file. Although we have not yet archived the current WAL file + * and readyXLogState points to the next WAL file, this is safe + * because the next cycle will not begin until we finish archiving + * current WAL file. + */ + xlogState->lastSegNo++; + return true; + } + } + + /* + * This is a fall-back path, check if we are here due to the unavailability + * of next anticipated log segment or the archiver is being forced to + * perform a full directory scan. Reset the flag in shared memory only if + * it has been enabled to force a full directory scan. + */ + if (!pg_atomic_unlocked_test_flag(&PgArch->dirScan)) + PgArchDisableDirScan(); + + /* + * Perform a full directory scan to identify the next log segment. There + * may be one of the following scenarios which may require us to perform a + * full directory scan. + * + * - This is the first cycle since archiver has started and there is no + * idea about the next anticipated log segment. + * + * - There is a timeline switch, archive history file as part of this + * timeline switch. + * + * - .ready file is created out of order. + * + * - The next anticipated log segment is not available. + * + * open xlog status directory and read through list of xlogs that have the + * .ready suffix, looking for earliest file. + */ snprintf(XLogArchiveStatusDir, MAXPGPATH, XLOGDIR "/archive_status"); rldir = AllocateDir(XLogArchiveStatusDir); while ((rlde = ReadDir(rldir, XLogArchiveStatusDir)) != NULL) { int basenamelen = (int) strlen(rlde->d_name) - 6; - char basename[MAX_XFN_CHARS + 1]; bool ishistory; /* Ignore entries with unexpected number of characters */ @@ -661,6 +777,27 @@ pgarch_readyXlog(char *xlog) strcpy(xlog, basename); } } + + if (found) + { + if (!historyFound) + { + /* + * Reset segment number and timeline ID as this is the beginning of a + * new sequence. + */ + XLogFromFileName(xlog, &xlogState->lastTLI, &xlogState->lastSegNo, + wal_segment_size); + + /* Increment log segment number to point to the next WAL file */ + xlogState->lastSegNo++; + } + + ereport(LOG, + (errmsg("directory scan to archive write-ahead log file \"%s\"", + xlog))); + } + FreeDir(rldir); return found; diff --git a/src/include/postmaster/pgarch.h b/src/include/postmaster/pgarch.h index 1e47a14..265f7e5 100644 --- a/src/include/postmaster/pgarch.h +++ b/src/include/postmaster/pgarch.h @@ -31,5 +31,6 @@ extern void PgArchShmemInit(void); extern bool PgArchCanRestart(void); extern void PgArchiverMain(void) pg_attribute_noreturn(); extern void PgArchWakeup(void); +extern void PgArchEnableDirScan(void); #endif /* _PGARCH_H */ -- 1.8.3.1