On Tue, Feb 02, 2021 at 07:14:16AM -0800, Noah Misch wrote:
> Recycling and preallocation are wasteful during archive recovery, because
> KeepFileRestoredFromArchive() unlinks every entry in its path.  I propose to
> fix the race by adding an XLogCtl flag indicating which regime currently owns
> the right to add long-term pg_wal directory entries.  In the archive recovery
> regime, the checkpointer will not preallocate and will unlink old segments
> instead of recycling them (like wal_recycle=off).  XLogFileInit() will fail.

Here's the implementation.  Patches 1-4 suffice to stop the user-visible
ERROR.  Patch 5 avoids a spurious LOG-level message and wasted filesystem
writes, and it provides some future-proofing.

I was tempted to (but did not) just remove preallocation.  Creating one file
per checkpoint seems tiny relative to the max_wal_size=1GB default, so I
expect it's hard to isolate any benefit.  Under the old checkpoint_segments=3
default, a preallocated segment covered a respectable third of the next
checkpoint.  Before commit 63653f7 (2002), preallocation created more files.
Author:     Noah Misch <n...@leadboat.com>
Commit:     Noah Misch <n...@leadboat.com>

    Remove XLogFileInit() ability to skip ControlFileLock.
    
    Cold paths, initdb and end-of-recovery, used it.  Don't optimize them.
    
    Discussion: https://postgr.es/m/20210202151416.gb3304...@rfd.leadboat.com

diff --git a/src/backend/access/transam/xlog.c 
b/src/backend/access/transam/xlog.c
index 1b3a3d9..39a38ba 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -913,8 +913,7 @@ static void AdvanceXLInsertBuffer(XLogRecPtr upto, bool 
opportunistic);
 static bool XLogCheckpointNeeded(XLogSegNo new_segno);
 static void XLogWrite(XLogwrtRqst WriteRqst, bool flexible);
 static bool InstallXLogFileSegment(XLogSegNo *segno, char *tmppath,
-                                                                  bool 
find_free, XLogSegNo max_segno,
-                                                                  bool 
use_lock);
+                                                                  bool 
find_free, XLogSegNo max_segno);
 static int     XLogFileRead(XLogSegNo segno, int emode, TimeLineID tli,
                                                 XLogSource source, bool 
notfoundOk);
 static int     XLogFileReadAnyTLI(XLogSegNo segno, int emode, XLogSource 
source);
@@ -2492,7 +2491,7 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible)
 
                        /* create/use new log file */
                        use_existent = true;
-                       openLogFile = XLogFileInit(openLogSegNo, &use_existent, 
true);
+                       openLogFile = XLogFileInit(openLogSegNo, &use_existent);
                        ReserveExternalFD();
                }
 
@@ -3265,10 +3264,6 @@ XLogNeedsFlush(XLogRecPtr record)
  * pre-existing file will be deleted).  On return, true if a pre-existing
  * file was used.
  *
- * use_lock: if true, acquire ControlFileLock while moving file into
- * place.  This should be true except during bootstrap log creation.  The
- * caller must *not* hold the lock at call.
- *
  * Returns FD of opened file.
  *
  * Note: errors here are ERROR not PANIC because we might or might not be
@@ -3277,7 +3272,7 @@ XLogNeedsFlush(XLogRecPtr record)
  * in a critical section.
  */
 int
-XLogFileInit(XLogSegNo logsegno, bool *use_existent, bool use_lock)
+XLogFileInit(XLogSegNo logsegno, bool *use_existent)
 {
        char            path[MAXPGPATH];
        char            tmppath[MAXPGPATH];
@@ -3437,8 +3432,7 @@ XLogFileInit(XLogSegNo logsegno, bool *use_existent, bool 
use_lock)
         */
        max_segno = logsegno + CheckPointSegments;
        if (!InstallXLogFileSegment(&installed_segno, tmppath,
-                                                               *use_existent, 
max_segno,
-                                                               use_lock))
+                                                               *use_existent, 
max_segno))
        {
                /*
                 * No need for any more future segments, or 
InstallXLogFileSegment()
@@ -3592,7 +3586,7 @@ XLogFileCopy(XLogSegNo destsegno, TimeLineID srcTLI, 
XLogSegNo srcsegno,
        /*
         * Now move the segment into place with its final name.
         */
-       if (!InstallXLogFileSegment(&destsegno, tmppath, false, 0, false))
+       if (!InstallXLogFileSegment(&destsegno, tmppath, false, 0))
                elog(ERROR, "InstallXLogFileSegment should not have failed");
 }
 
@@ -3616,29 +3610,20 @@ XLogFileCopy(XLogSegNo destsegno, TimeLineID srcTLI, 
XLogSegNo srcsegno,
  * free slot is found between *segno and max_segno. (Ignored when find_free
  * is false.)
  *
- * use_lock: if true, acquire ControlFileLock while moving file into
- * place.  This should be true except during bootstrap log creation.  The
- * caller must *not* hold the lock at call.
- *
  * Returns true if the file was installed successfully.  false indicates that
  * max_segno limit was exceeded, or an error occurred while renaming the
  * file into place.
  */
 static bool
 InstallXLogFileSegment(XLogSegNo *segno, char *tmppath,
-                                          bool find_free, XLogSegNo max_segno,
-                                          bool use_lock)
+                                          bool find_free, XLogSegNo max_segno)
 {
        char            path[MAXPGPATH];
        struct stat stat_buf;
 
        XLogFilePath(path, ThisTimeLineID, *segno, wal_segment_size);
 
-       /*
-        * We want to be sure that only one process does this at a time.
-        */
-       if (use_lock)
-               LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
+       LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
 
        if (!find_free)
        {
@@ -3653,8 +3638,7 @@ InstallXLogFileSegment(XLogSegNo *segno, char *tmppath,
                        if ((*segno) >= max_segno)
                        {
                                /* Failed to find a free slot within specified 
range */
-                               if (use_lock)
-                                       LWLockRelease(ControlFileLock);
+                               LWLockRelease(ControlFileLock);
                                return false;
                        }
                        (*segno)++;
@@ -3668,14 +3652,12 @@ InstallXLogFileSegment(XLogSegNo *segno, char *tmppath,
         */
        if (durable_rename_excl(tmppath, path, LOG) != 0)
        {
-               if (use_lock)
-                       LWLockRelease(ControlFileLock);
+               LWLockRelease(ControlFileLock);
                /* durable_rename_excl already emitted log message */
                return false;
        }
 
-       if (use_lock)
-               LWLockRelease(ControlFileLock);
+       LWLockRelease(ControlFileLock);
 
        return true;
 }
@@ -3946,7 +3928,7 @@ PreallocXlogFiles(XLogRecPtr endptr)
        {
                _logSegNo++;
                use_existent = true;
-               lf = XLogFileInit(_logSegNo, &use_existent, true);
+               lf = XLogFileInit(_logSegNo, &use_existent);
                close(lf);
                if (!use_existent)
                        CheckpointStats.ckpt_segs_added++;
@@ -4223,7 +4205,7 @@ RemoveXlogFile(const char *segname, XLogSegNo 
recycleSegNo,
                *endlogSegNo <= recycleSegNo &&
                lstat(path, &statbuf) == 0 && S_ISREG(statbuf.st_mode) &&
                InstallXLogFileSegment(endlogSegNo, path,
-                                                          true, recycleSegNo, 
true))
+                                                          true, recycleSegNo))
        {
                ereport(DEBUG2,
                                (errmsg_internal("recycled write-ahead log file 
\"%s\"",
@@ -5341,7 +5323,7 @@ BootStrapXLOG(void)
 
        /* Create first XLOG segment file */
        use_existent = false;
-       openLogFile = XLogFileInit(1, &use_existent, false);
+       openLogFile = XLogFileInit(1, &use_existent);
 
        /*
         * We needn't bother with Reserve/ReleaseExternalFD here, since we'll
@@ -5650,7 +5632,7 @@ exitArchiveRecovery(TimeLineID endTLI, XLogRecPtr 
endOfLog)
                bool            use_existent = true;
                int                     fd;
 
-               fd = XLogFileInit(startLogSegNo, &use_existent, true);
+               fd = XLogFileInit(startLogSegNo, &use_existent);
 
                if (close(fd) != 0)
                {
diff --git a/src/backend/replication/walreceiver.c 
b/src/backend/replication/walreceiver.c
index faeea9f..b00066e 100644
--- a/src/backend/replication/walreceiver.c
+++ b/src/backend/replication/walreceiver.c
@@ -924,7 +924,7 @@ XLogWalRcvWrite(char *buf, Size nbytes, XLogRecPtr recptr)
                        /* Create/use new log file */
                        XLByteToSeg(recptr, recvSegNo, wal_segment_size);
                        use_existent = true;
-                       recvFile = XLogFileInit(recvSegNo, &use_existent, true);
+                       recvFile = XLogFileInit(recvSegNo, &use_existent);
                        recvFileTLI = ThisTimeLineID;
                }
 
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index 77187c1..a175649 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -296,7 +296,7 @@ extern XLogRecPtr XLogInsertRecord(struct XLogRecData 
*rdata,
 extern void XLogFlush(XLogRecPtr RecPtr);
 extern bool XLogBackgroundFlush(void);
 extern bool XLogNeedsFlush(XLogRecPtr RecPtr);
-extern int     XLogFileInit(XLogSegNo segno, bool *use_existent, bool 
use_lock);
+extern int     XLogFileInit(XLogSegNo segno, bool *use_existent);
 extern int     XLogFileOpen(XLogSegNo segno);
 
 extern void CheckXLogRemoved(XLogSegNo segno, TimeLineID tli);
Author:     Noah Misch <n...@leadboat.com>
Commit:     Noah Misch <n...@leadboat.com>

    In XLogFileInit(), fix *use_existent postcondition to suit callers.
    
    Infrequently, the mismatch caused log_checkpoints messages and
    TRACE_POSTGRESQL_CHECKPOINT_DONE() to witness an "added" count too high
    by one.  Since that consequence is so minor, no back-patch.
    
    Discussion: https://postgr.es/m/20210202151416.gb3304...@rfd.leadboat.com

diff --git a/src/backend/access/transam/xlog.c 
b/src/backend/access/transam/xlog.c
index 39a38ba..073dabc 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -3261,8 +3261,8 @@ XLogNeedsFlush(XLogRecPtr record)
  * logsegno: identify segment to be created/opened.
  *
  * *use_existent: if true, OK to use a pre-existing file (else, any
- * pre-existing file will be deleted).  On return, true if a pre-existing
- * file was used.
+ * pre-existing file will be deleted).  On return, false iff this call added
+ * some segment on disk.
  *
  * Returns FD of opened file.
  *
@@ -3431,8 +3431,10 @@ XLogFileInit(XLogSegNo logsegno, bool *use_existent)
         * CheckPointSegments.
         */
        max_segno = logsegno + CheckPointSegments;
-       if (!InstallXLogFileSegment(&installed_segno, tmppath,
-                                                               *use_existent, 
max_segno))
+       if (InstallXLogFileSegment(&installed_segno, tmppath,
+                                                          *use_existent, 
max_segno))
+               *use_existent = false;
+       else
        {
                /*
                 * No need for any more future segments, or 
InstallXLogFileSegment()
@@ -3442,9 +3444,6 @@ XLogFileInit(XLogSegNo logsegno, bool *use_existent)
                unlink(tmppath);
        }
 
-       /* Set flag to tell caller there was no existent file */
-       *use_existent = false;
-
        /* Now open original target segment (might not be file I just made) */
        fd = BasicOpenFile(path, O_RDWR | PG_BINARY | 
get_sync_bit(sync_method));
        if (fd < 0)
Author:     Noah Misch <n...@leadboat.com>
Commit:     Noah Misch <n...@leadboat.com>

    Remove XLogFileInit() ability to unlink a pre-existing file.
    
    Only initdb used it.  initdb refuses to operate on a non-empty directory
    and generally does not cope with pre-existing files of other kinds.
    Hence, use the opportunity to simplify.
    
    Discussion: https://postgr.es/m/20210202151416.gb3304...@rfd.leadboat.com

diff --git a/src/backend/access/transam/xlog.c 
b/src/backend/access/transam/xlog.c
index 073dabc..1a31788 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -2424,7 +2424,7 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible)
        bool            ispartialpage;
        bool            last_iteration;
        bool            finishing_seg;
-       bool            use_existent;
+       bool            added;
        int                     curridx;
        int                     npages;
        int                     startidx;
@@ -2490,8 +2490,7 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible)
                                                        wal_segment_size);
 
                        /* create/use new log file */
-                       use_existent = true;
-                       openLogFile = XLogFileInit(openLogSegNo, &use_existent);
+                       openLogFile = XLogFileInit(openLogSegNo, &added);
                        ReserveExternalFD();
                }
 
@@ -3260,9 +3259,7 @@ XLogNeedsFlush(XLogRecPtr record)
  *
  * logsegno: identify segment to be created/opened.
  *
- * *use_existent: if true, OK to use a pre-existing file (else, any
- * pre-existing file will be deleted).  On return, false iff this call added
- * some segment on disk.
+ * *added: on return, true if this call raised the number of extant segments.
  *
  * Returns FD of opened file.
  *
@@ -3272,7 +3269,7 @@ XLogNeedsFlush(XLogRecPtr record)
  * in a critical section.
  */
 int
-XLogFileInit(XLogSegNo logsegno, bool *use_existent)
+XLogFileInit(XLogSegNo logsegno, bool *added)
 {
        char            path[MAXPGPATH];
        char            tmppath[MAXPGPATH];
@@ -3287,19 +3284,17 @@ XLogFileInit(XLogSegNo logsegno, bool *use_existent)
        /*
         * Try to use existent file (checkpoint maker may have created it 
already)
         */
-       if (*use_existent)
+       *added = false;
+       fd = BasicOpenFile(path, O_RDWR | PG_BINARY | 
get_sync_bit(sync_method));
+       if (fd < 0)
        {
-               fd = BasicOpenFile(path, O_RDWR | PG_BINARY | 
get_sync_bit(sync_method));
-               if (fd < 0)
-               {
-                       if (errno != ENOENT)
-                               ereport(ERROR,
-                                               (errcode_for_file_access(),
-                                                errmsg("could not open file 
\"%s\": %m", path)));
-               }
-               else
-                       return fd;
+               if (errno != ENOENT)
+                       ereport(ERROR,
+                                       (errcode_for_file_access(),
+                                        errmsg("could not open file \"%s\": 
%m", path)));
        }
+       else
+               return fd;
 
        /*
         * Initialize an empty (all zeroes) segment.  NOTE: it is possible that
@@ -3412,12 +3407,9 @@ XLogFileInit(XLogSegNo logsegno, bool *use_existent)
                                 errmsg("could not close file \"%s\": %m", 
tmppath)));
 
        /*
-        * Now move the segment into place with its final name.
-        *
-        * If caller didn't want to use a pre-existing file, get rid of any
-        * pre-existing file.  Otherwise, cope with possibility that someone 
else
-        * has created the file while we were filling ours: if so, use ours to
-        * pre-create a future log segment.
+        * Now move the segment into place with its final name.  Cope with
+        * possibility that someone else has created the file while we were
+        * filling ours: if so, use ours to pre-create a future log segment.
         */
        installed_segno = logsegno;
 
@@ -3431,9 +3423,8 @@ XLogFileInit(XLogSegNo logsegno, bool *use_existent)
         * CheckPointSegments.
         */
        max_segno = logsegno + CheckPointSegments;
-       if (InstallXLogFileSegment(&installed_segno, tmppath,
-                                                          *use_existent, 
max_segno))
-               *use_existent = false;
+       if (InstallXLogFileSegment(&installed_segno, tmppath, true, max_segno))
+               *added = true;
        else
        {
                /*
@@ -3918,7 +3909,7 @@ PreallocXlogFiles(XLogRecPtr endptr)
 {
        XLogSegNo       _logSegNo;
        int                     lf;
-       bool            use_existent;
+       bool            added;
        uint64          offset;
 
        XLByteToPrevSeg(endptr, _logSegNo, wal_segment_size);
@@ -3926,10 +3917,9 @@ PreallocXlogFiles(XLogRecPtr endptr)
        if (offset >= (uint32) (0.75 * wal_segment_size))
        {
                _logSegNo++;
-               use_existent = true;
-               lf = XLogFileInit(_logSegNo, &use_existent);
+               lf = XLogFileInit(_logSegNo, &added);
                close(lf);
-               if (!use_existent)
+               if (added)
                        CheckpointStats.ckpt_segs_added++;
        }
 }
@@ -5224,7 +5214,7 @@ BootStrapXLOG(void)
        XLogLongPageHeader longpage;
        XLogRecord *record;
        char       *recptr;
-       bool            use_existent;
+       bool            added;
        uint64          sysidentifier;
        struct timeval tv;
        pg_crc32c       crc;
@@ -5321,8 +5311,7 @@ BootStrapXLOG(void)
        record->xl_crc = crc;
 
        /* Create first XLOG segment file */
-       use_existent = false;
-       openLogFile = XLogFileInit(1, &use_existent);
+       openLogFile = XLogFileInit(1, &added);
 
        /*
         * We needn't bother with Reserve/ReleaseExternalFD here, since we'll
@@ -5628,10 +5617,10 @@ exitArchiveRecovery(TimeLineID endTLI, XLogRecPtr 
endOfLog)
                 * The switch happened at a segment boundary, so just create 
the next
                 * segment on the new timeline.
                 */
-               bool            use_existent = true;
+               bool            added;
                int                     fd;
 
-               fd = XLogFileInit(startLogSegNo, &use_existent);
+               fd = XLogFileInit(startLogSegNo, &added);
 
                if (close(fd) != 0)
                {
diff --git a/src/backend/replication/walreceiver.c 
b/src/backend/replication/walreceiver.c
index b00066e..eadff8f 100644
--- a/src/backend/replication/walreceiver.c
+++ b/src/backend/replication/walreceiver.c
@@ -885,7 +885,7 @@ XLogWalRcvWrite(char *buf, Size nbytes, XLogRecPtr recptr)
 
                if (recvFile < 0 || !XLByteInSeg(recptr, recvSegNo, 
wal_segment_size))
                {
-                       bool            use_existent;
+                       bool            added;
 
                        /*
                         * fsync() and close current file before we switch to 
next one. We
@@ -923,8 +923,7 @@ XLogWalRcvWrite(char *buf, Size nbytes, XLogRecPtr recptr)
 
                        /* Create/use new log file */
                        XLByteToSeg(recptr, recvSegNo, wal_segment_size);
-                       use_existent = true;
-                       recvFile = XLogFileInit(recvSegNo, &use_existent);
+                       recvFile = XLogFileInit(recvSegNo, &added);
                        recvFileTLI = ThisTimeLineID;
                }
 
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index a175649..d8b8f59 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -296,7 +296,7 @@ extern XLogRecPtr XLogInsertRecord(struct XLogRecData 
*rdata,
 extern void XLogFlush(XLogRecPtr RecPtr);
 extern bool XLogBackgroundFlush(void);
 extern bool XLogNeedsFlush(XLogRecPtr RecPtr);
-extern int     XLogFileInit(XLogSegNo segno, bool *use_existent);
+extern int     XLogFileInit(XLogSegNo segno, bool *added);
 extern int     XLogFileOpen(XLogSegNo segno);
 
 extern void CheckXLogRemoved(XLogSegNo segno, TimeLineID tli);
Author:     Noah Misch <n...@leadboat.com>
Commit:     Noah Misch <n...@leadboat.com>

    Don't ERROR on PreallocXlogFiles() race condition.
    
    Before a restartpoint finishes PreallocXlogFiles(), a startup process
    KeepFileRestoredFromArchive() call can unlink the preallocated segment.
    If a CHECKPOINT sql command had elicited the restartpoint experiencing
    the race condition, that sql command failed.  Moreover, the restartpoint
    omitted its log_checkpoints message and some inessential resource
    reclamation.  Prevent the ERROR by skipping open() of the segment.
    Since these consequences are so minor, no back-patch.
    
    Discussion: https://postgr.es/m/20210202151416.gb3304...@rfd.leadboat.com

diff --git a/src/backend/access/transam/xlog.c 
b/src/backend/access/transam/xlog.c
index 1a31788..8cda20d 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -2424,7 +2424,6 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible)
        bool            ispartialpage;
        bool            last_iteration;
        bool            finishing_seg;
-       bool            added;
        int                     curridx;
        int                     npages;
        int                     startidx;
@@ -2490,7 +2489,7 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible)
                                                        wal_segment_size);
 
                        /* create/use new log file */
-                       openLogFile = XLogFileInit(openLogSegNo, &added);
+                       openLogFile = XLogFileInit(openLogSegNo);
                        ReserveExternalFD();
                }
 
@@ -3255,23 +3254,21 @@ XLogNeedsFlush(XLogRecPtr record)
 }
 
 /*
- * Create a new XLOG file segment, or open a pre-existing one.
+ * Try to make a given XLOG file segment exist.
  *
- * logsegno: identify segment to be created/opened.
+ * logsegno: identify segment.
  *
  * *added: on return, true if this call raised the number of extant segments.
  *
- * Returns FD of opened file.
+ * path: on return, this char[MAXPGPATH] has the path to the logsegno file.
  *
- * Note: errors here are ERROR not PANIC because we might or might not be
- * inside a critical section (eg, during checkpoint there is no reason to
- * take down the system on failure).  They will promote to PANIC if we are
- * in a critical section.
+ * Returns -1 or FD of opened file.  A -1 here is not an error; a caller
+ * wanting an open segment should attempt to open "path", which usually will
+ * succeed.  (This is weird, but it's efficient for the callers.)
  */
-int
-XLogFileInit(XLogSegNo logsegno, bool *added)
+static int
+XLogFileInitInternal(XLogSegNo logsegno, bool *added, char *path)
 {
-       char            path[MAXPGPATH];
        char            tmppath[MAXPGPATH];
        PGAlignedXLogBlock zbuffer;
        XLogSegNo       installed_segno;
@@ -3424,26 +3421,53 @@ XLogFileInit(XLogSegNo logsegno, bool *added)
         */
        max_segno = logsegno + CheckPointSegments;
        if (InstallXLogFileSegment(&installed_segno, tmppath, true, max_segno))
+       {
                *added = true;
+               elog(DEBUG2, "done creating and filling new WAL file");
+       }
        else
        {
                /*
                 * No need for any more future segments, or 
InstallXLogFileSegment()
-                * failed to rename the file into place. If the rename failed, 
opening
-                * the file below will fail.
+                * failed to rename the file into place. If the rename failed, a
+                * caller opening the file may fail.
                 */
                unlink(tmppath);
+               elog(DEBUG2, "abandoned new WAL file");
        }
 
+       return -1;
+}
+
+/*
+ * Create a new XLOG file segment, or open a pre-existing one.
+ *
+ * logsegno: identify segment to be created/opened.
+ *
+ * Returns FD of opened file.
+ *
+ * Note: errors here are ERROR not PANIC because we might or might not be
+ * inside a critical section (eg, during checkpoint there is no reason to
+ * take down the system on failure).  They will promote to PANIC if we are
+ * in a critical section.
+ */
+int
+XLogFileInit(XLogSegNo logsegno)
+{
+       bool            ignore_added;
+       char            path[MAXPGPATH];
+       int                     fd;
+
+       fd = XLogFileInitInternal(logsegno, &ignore_added, path);
+       if (fd >= 0)
+               return fd;
+
        /* Now open original target segment (might not be file I just made) */
        fd = BasicOpenFile(path, O_RDWR | PG_BINARY | 
get_sync_bit(sync_method));
        if (fd < 0)
                ereport(ERROR,
                                (errcode_for_file_access(),
                                 errmsg("could not open file \"%s\": %m", 
path)));
-
-       elog(DEBUG2, "done creating and filling new WAL file");
-
        return fd;
 }
 
@@ -3903,6 +3927,15 @@ XLogFileClose(void)
  * High-volume systems will be OK once they've built up a sufficient set of
  * recycled log segments, but the startup transient is likely to include
  * a lot of segment creations by foreground processes, which is not so good.
+ *
+ * XLogFileInitInternal() can ereport(ERROR).  All known causes indicate big
+ * trouble; for example, a full filesystem is one cause.  The checkpoint WAL
+ * and/or ControlFile updates already completed.  If a RequestCheckpoint()
+ * initiated the present checkpoint and an ERROR ends this function, the
+ * command that called RequestCheckpoint() fails.  That's not ideal, but it's
+ * not worth contorting more functions to use caller-specified elevel values.
+ * (With or without RequestCheckpoint(), an ERROR forestalls some inessential
+ * reporting and resource reclamation.)
  */
 static void
 PreallocXlogFiles(XLogRecPtr endptr)
@@ -3910,6 +3943,7 @@ PreallocXlogFiles(XLogRecPtr endptr)
        XLogSegNo       _logSegNo;
        int                     lf;
        bool            added;
+       char            path[MAXPGPATH];
        uint64          offset;
 
        XLByteToPrevSeg(endptr, _logSegNo, wal_segment_size);
@@ -3917,8 +3951,9 @@ PreallocXlogFiles(XLogRecPtr endptr)
        if (offset >= (uint32) (0.75 * wal_segment_size))
        {
                _logSegNo++;
-               lf = XLogFileInit(_logSegNo, &added);
-               close(lf);
+               lf = XLogFileInitInternal(_logSegNo, &added, path);
+               if (lf >= 0)
+                       close(lf);
                if (added)
                        CheckpointStats.ckpt_segs_added++;
        }
@@ -5214,7 +5249,6 @@ BootStrapXLOG(void)
        XLogLongPageHeader longpage;
        XLogRecord *record;
        char       *recptr;
-       bool            added;
        uint64          sysidentifier;
        struct timeval tv;
        pg_crc32c       crc;
@@ -5311,7 +5345,7 @@ BootStrapXLOG(void)
        record->xl_crc = crc;
 
        /* Create first XLOG segment file */
-       openLogFile = XLogFileInit(1, &added);
+       openLogFile = XLogFileInit(1);
 
        /*
         * We needn't bother with Reserve/ReleaseExternalFD here, since we'll
@@ -5617,10 +5651,9 @@ exitArchiveRecovery(TimeLineID endTLI, XLogRecPtr 
endOfLog)
                 * The switch happened at a segment boundary, so just create 
the next
                 * segment on the new timeline.
                 */
-               bool            added;
                int                     fd;
 
-               fd = XLogFileInit(startLogSegNo, &added);
+               fd = XLogFileInit(startLogSegNo);
 
                if (close(fd) != 0)
                {
diff --git a/src/backend/replication/walreceiver.c 
b/src/backend/replication/walreceiver.c
index eadff8f..2be9ad9 100644
--- a/src/backend/replication/walreceiver.c
+++ b/src/backend/replication/walreceiver.c
@@ -885,8 +885,6 @@ XLogWalRcvWrite(char *buf, Size nbytes, XLogRecPtr recptr)
 
                if (recvFile < 0 || !XLByteInSeg(recptr, recvSegNo, 
wal_segment_size))
                {
-                       bool            added;
-
                        /*
                         * fsync() and close current file before we switch to 
next one. We
                         * would otherwise have to reopen this file to fsync it 
later
@@ -923,7 +921,7 @@ XLogWalRcvWrite(char *buf, Size nbytes, XLogRecPtr recptr)
 
                        /* Create/use new log file */
                        XLByteToSeg(recptr, recvSegNo, wal_segment_size);
-                       recvFile = XLogFileInit(recvSegNo, &added);
+                       recvFile = XLogFileInit(recvSegNo);
                        recvFileTLI = ThisTimeLineID;
                }
 
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index d8b8f59..7510e88 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -296,7 +296,7 @@ extern XLogRecPtr XLogInsertRecord(struct XLogRecData 
*rdata,
 extern void XLogFlush(XLogRecPtr RecPtr);
 extern bool XLogBackgroundFlush(void);
 extern bool XLogNeedsFlush(XLogRecPtr RecPtr);
-extern int     XLogFileInit(XLogSegNo segno, bool *added);
+extern int     XLogFileInit(XLogSegNo segno);
 extern int     XLogFileOpen(XLogSegNo segno);
 
 extern void CheckXLogRemoved(XLogSegNo segno, TimeLineID tli);
Author:     Noah Misch <n...@leadboat.com>
Commit:     Noah Misch <n...@leadboat.com>

    Skip WAL recycling and preallocation during archive recovery.
    
    The previous commit addressed the chief consequences of a race condition
    between InstallXLogFileSegment() and KeepFileRestoredFromArchive().  Fix
    three lesser consequences.  A spurious durable_rename_excl() LOG message
    remained possible.  KeepFileRestoredFromArchive() wasted the proceeds of
    WAL recycling and preallocation.  Finally, XLogFileInitInternal() could
    return a descriptor for a file that KeepFileRestoredFromArchive() had
    already unlinked.  That felt like a recipe for future bugs.
    
    Discussion: https://postgr.es/m/20210202151416.gb3304...@rfd.leadboat.com

diff --git a/src/backend/access/transam/xlog.c 
b/src/backend/access/transam/xlog.c
index 8cda20d..2c6e21b 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -663,6 +663,16 @@ typedef struct XLogCtlData
        bool            SharedHotStandbyActive;
 
        /*
+        * InstallXLogFileSegmentActive indicates whether the checkpointer 
should
+        * arrange for future segments by recycling and/or PreallocXlogFiles().
+        * Protected by ControlFileLock.  Only the startup process changes it.  
If
+        * true, anyone can use InstallXLogFileSegment().  If false, the startup
+        * process owns the exclusive right to install segments, by reading from
+        * the archive and possibly replacing existing files.
+        */
+       bool            InstallXLogFileSegmentActive;
+
+       /*
         * SharedPromoteIsTriggered indicates if a standby promotion has been
         * triggered.  Protected by info_lck.
         */
@@ -921,6 +931,7 @@ static int  XLogPageRead(XLogReaderState *xlogreader, 
XLogRecPtr targetPagePtr,
                                                 int reqLen, XLogRecPtr 
targetRecPtr, char *readBuf);
 static bool WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
                                                                                
bool fetching_ckpt, XLogRecPtr tliRecPtr);
+static void XLogShutdownWalRcv(void);
 static int     emode_for_corrupt_record(int emode, XLogRecPtr RecPtr);
 static void XLogFileClose(void);
 static void PreallocXlogFiles(XLogRecPtr endptr);
@@ -3625,8 +3636,8 @@ XLogFileCopy(XLogSegNo destsegno, TimeLineID srcTLI, 
XLogSegNo srcsegno,
  * is false.)
  *
  * Returns true if the file was installed successfully.  false indicates that
- * max_segno limit was exceeded, or an error occurred while renaming the
- * file into place.
+ * max_segno limit was exceeded, the startup process has disabled this
+ * function for now, or an error occurred while renaming the file into place.
  */
 static bool
 InstallXLogFileSegment(XLogSegNo *segno, char *tmppath,
@@ -3638,6 +3649,11 @@ InstallXLogFileSegment(XLogSegNo *segno, char *tmppath,
        XLogFilePath(path, ThisTimeLineID, *segno, wal_segment_size);
 
        LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
+       if (!XLogCtl->InstallXLogFileSegmentActive)
+       {
+               LWLockRelease(ControlFileLock);
+               return false;
+       }
 
        if (!find_free)
        {
@@ -3745,6 +3761,7 @@ XLogFileRead(XLogSegNo segno, int emode, TimeLineID tli,
         */
        if (source == XLOG_FROM_ARCHIVE)
        {
+               Assert(!XLogCtl->InstallXLogFileSegmentActive);
                KeepFileRestoredFromArchive(path, xlogfname);
 
                /*
@@ -3946,6 +3963,9 @@ PreallocXlogFiles(XLogRecPtr endptr)
        char            path[MAXPGPATH];
        uint64          offset;
 
+       if (!XLogCtl->InstallXLogFileSegmentActive)
+               return;                                 /* unlocked check says 
no */
+
        XLByteToPrevSeg(endptr, _logSegNo, wal_segment_size);
        offset = XLogSegmentOffset(endptr - 1, wal_segment_size);
        if (offset >= (uint32) (0.75 * wal_segment_size))
@@ -4227,6 +4247,7 @@ RemoveXlogFile(const char *segname, XLogSegNo 
recycleSegNo,
         */
        if (wal_recycle &&
                *endlogSegNo <= recycleSegNo &&
+               XLogCtl->InstallXLogFileSegmentActive &&        /* callee 
rechecks this */
                lstat(path, &statbuf) == 0 && S_ISREG(statbuf.st_mode) &&
                InstallXLogFileSegment(endlogSegNo, path,
                                                           true, recycleSegNo))
@@ -4240,7 +4261,7 @@ RemoveXlogFile(const char *segname, XLogSegNo 
recycleSegNo,
        }
        else
        {
-               /* No need for any more future segments... */
+               /* No need for any more future segments, or recycling failed 
... */
                int                     rc;
 
                ereport(DEBUG2,
@@ -5226,6 +5247,7 @@ XLOGShmemInit(void)
        XLogCtl->XLogCacheBlck = XLOGbuffers - 1;
        XLogCtl->SharedRecoveryState = RECOVERY_STATE_CRASH;
        XLogCtl->SharedHotStandbyActive = false;
+       XLogCtl->InstallXLogFileSegmentActive = false;
        XLogCtl->SharedPromoteIsTriggered = false;
        XLogCtl->WalWriterSleeping = false;
 
@@ -5253,6 +5275,11 @@ BootStrapXLOG(void)
        struct timeval tv;
        pg_crc32c       crc;
 
+       /* allow ordinary WAL segment creation, like StartupXLOG() would */
+       LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
+       XLogCtl->InstallXLogFileSegmentActive = true;
+       LWLockRelease(ControlFileLock);
+
        /*
         * Select a hopefully-unique system identifier code for this 
installation.
         * We use the result of gettimeofday(), including the fractional seconds
@@ -7619,7 +7646,7 @@ StartupXLOG(void)
         * the startup checkpoint record. It will trump over the checkpoint and
         * subsequent records if it's still alive when we start writing WAL.
         */
-       ShutdownWalRcv();
+       XLogShutdownWalRcv();
 
        /*
         * Reset unlogged relations to the contents of their INIT fork. This is
@@ -7644,7 +7671,7 @@ StartupXLOG(void)
         * recovery, e.g., timeline history file) from archive or pg_wal.
         *
         * Note that standby mode must be turned off after killing WAL receiver,
-        * i.e., calling ShutdownWalRcv().
+        * i.e., calling XLogShutdownWalRcv().
         */
        Assert(!WalRcvStreaming());
        StandbyMode = false;
@@ -7710,6 +7737,14 @@ StartupXLOG(void)
        oldestActiveXID = PrescanPreparedTransactions(NULL, NULL);
 
        /*
+        * Allow ordinary WAL segment creation before any exitArchiveRecovery(),
+        * which sometimes creates a segment, and after the last ReadRecord().
+        */
+       LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
+       XLogCtl->InstallXLogFileSegmentActive = true;
+       LWLockRelease(ControlFileLock);
+
+       /*
         * Consider whether we need to assign a new timeline ID.
         *
         * If we are doing an archive recovery, we always assign a new ID.  This
@@ -12378,7 +12413,7 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool 
randAccess,
                                         */
                                        if (StandbyMode && 
CheckForStandbyTrigger())
                                        {
-                                               ShutdownWalRcv();
+                                               XLogShutdownWalRcv();
                                                return false;
                                        }
 
@@ -12426,7 +12461,7 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool 
randAccess,
                                         * WAL that we restore from archive.
                                         */
                                        if (WalRcvStreaming())
-                                               ShutdownWalRcv();
+                                               XLogShutdownWalRcv();
 
                                        /*
                                         * Before we sleep, re-scan for 
possible new timelines if
@@ -12553,7 +12588,7 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool 
randAccess,
                                         */
                                        if (pendingWalRcvRestart && 
!startWalReceiver)
                                        {
-                                               ShutdownWalRcv();
+                                               XLogShutdownWalRcv();
 
                                                /*
                                                 * Re-scan for possible new 
timelines if we were
@@ -12603,6 +12638,9 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool 
randAccess,
                                                                         tli, 
curFileTLI);
                                                }
                                                curFileTLI = tli;
+                                               LWLockAcquire(ControlFileLock, 
LW_EXCLUSIVE);
+                                               
XLogCtl->InstallXLogFileSegmentActive = true;
+                                               LWLockRelease(ControlFileLock);
                                                RequestXLogStreaming(tli, ptr, 
PrimaryConnInfo,
                                                                                
         PrimarySlotName,
                                                                                
         wal_receiver_create_temp_slot);
@@ -12770,6 +12808,17 @@ StartupRequestWalReceiverRestart(void)
        }
 }
 
+/* Thin wrapper around ShutdownWalRcv(). */
+static void
+XLogShutdownWalRcv(void)
+{
+       ShutdownWalRcv();
+
+       LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
+       XLogCtl->InstallXLogFileSegmentActive = false;
+       LWLockRelease(ControlFileLock);
+}
+
 /*
  * Determine what log level should be used to report a corrupt WAL record
  * in the current WAL page, previously read by XLogPageRead().

Reply via email to