On 2021/09/03 14:56, Kyotaro Horiguchi wrote:
+                               if (readSource == XLOG_FROM_STREAM &&
+                                       record->xl_rmid == RM_XLOG_ID &&
+                                       (record->xl_info & ~XLR_INFO_MASK) == 
XLOG_SWITCH)

readSource is the source at the time startup reads it and it could be
different from the source at the time the record was written. We
cannot know where the record came from there, so does the readSource
condition work as expected?  If we had some trouble streaming just
before, readSource at the time is likely to be XLOG_FROM_PG_WAL.

Yes.


+                                               if (XLogArchivingAlways())
+                                                       
XLogArchiveNotify(xlogfilename, true);
+                                               else
+                                                       
XLogArchiveForceDone(xlogfilename);

The path is used both for crash and archive recovery. If we pass there
while crash recovery on a primary with archive_mode=on, the file could
be marked .done before actually archived. On the other hand when
archive_mode=always, the file could be re-marked .ready even after it
has been already archived.  Why isn't it XLogArchiveCheckDone?

Yeah, you're right. ISTM what we should do is to just call
XLogArchiveCheckDone() for the segment including XLOG_SWITCH record,
i.e., to create .ready file if the segment has no archive notification file yet
and archive_mode is set to 'always'. Even if we don't do this when we reach
XLOG_SWITCH record, subsequent restartpoints eventually will call
XLogArchiveCheckDone() for such segments.

One issue of this approach is that the WAL segment including XLOG_SWITCH
record may be archived before its previous segments are. That is,
the notification file of current segment is created when it's replayed
because it includes XLOG_SWIATCH, but the notification files of
its previous segments will be created by subsequent restartpoints
because they don't have XLOG_SWITCH. Probably we should avoid this?

If yes, one approach for this issue is to call XLogArchiveCheckDone() for
not only the segment including XLOG_SWITCH but also all the segments
older than that. Thought?


Anyway, I extracted the changes in walreceiver from the patch,
because it's self-contained and can be applied separately.
Patch attached.

Regards,

--
Fujii Masao
Advanced Computing Technology Center
Research and Development Headquarters
NTT DATA CORPORATION
diff --git a/src/backend/replication/walreceiver.c 
b/src/backend/replication/walreceiver.c
index 9a2bc37fd7..eb9d12adc1 100644
--- a/src/backend/replication/walreceiver.c
+++ b/src/backend/replication/walreceiver.c
@@ -125,6 +125,7 @@ static void WalRcvDie(int code, Datum arg);
 static void XLogWalRcvProcessMsg(unsigned char type, char *buf, Size len);
 static void XLogWalRcvWrite(char *buf, Size nbytes, XLogRecPtr recptr);
 static void XLogWalRcvFlush(bool dying);
+static void XLogWalRcvClose(XLogRecPtr recptr);
 static void XLogWalRcvSendReply(bool force, bool requestReply);
 static void XLogWalRcvSendHSFeedback(bool immed);
 static void ProcessWalSndrMessage(XLogRecPtr walEnd, TimestampTz sendTime);
@@ -883,42 +884,11 @@ XLogWalRcvWrite(char *buf, Size nbytes, XLogRecPtr recptr)
        {
                int                     segbytes;
 
-               if (recvFile < 0 || !XLByteInSeg(recptr, recvSegNo, 
wal_segment_size))
+               /* Close the current segment if it's completed */
+               XLogWalRcvClose(recptr);
+
+               if (recvFile < 0)
                {
-                       /*
-                        * fsync() and close current file before we switch to 
next one. We
-                        * would otherwise have to reopen this file to fsync it 
later
-                        */
-                       if (recvFile >= 0)
-                       {
-                               char            xlogfname[MAXFNAMELEN];
-
-                               XLogWalRcvFlush(false);
-
-                               XLogFileName(xlogfname, recvFileTLI, recvSegNo, 
wal_segment_size);
-
-                               /*
-                                * XLOG segment files will be re-read by 
recovery in startup
-                                * process soon, so we don't advise the OS to 
release cache
-                                * pages associated with the file like 
XLogFileClose() does.
-                                */
-                               if (close(recvFile) != 0)
-                                       ereport(PANIC,
-                                                       
(errcode_for_file_access(),
-                                                        errmsg("could not 
close log segment %s: %m",
-                                                                       
xlogfname)));
-
-                               /*
-                                * Create .done file forcibly to prevent the 
streamed segment
-                                * from being archived later.
-                                */
-                               if (XLogArchiveMode != ARCHIVE_MODE_ALWAYS)
-                                       XLogArchiveForceDone(xlogfname);
-                               else
-                                       XLogArchiveNotify(xlogfname);
-                       }
-                       recvFile = -1;
-
                        /* Create/use new log file */
                        XLByteToSeg(recptr, recvSegNo, wal_segment_size);
                        recvFile = XLogFileInit(recvSegNo);
@@ -967,6 +937,14 @@ XLogWalRcvWrite(char *buf, Size nbytes, XLogRecPtr recptr)
 
        /* Update shared-memory status */
        pg_atomic_write_u64(&WalRcv->writtenUpto, LogstreamResult.Write);
+
+       /*
+        * Close the current segment if it's fully written up in the last cycle 
of
+        * the loop, to create its archive notification file soon. Otherwise WAL
+        * archiving of the segment will be delayed until any data in the next
+        * segment is received and written.
+        */
+       XLogWalRcvClose(recptr);
 }
 
 /*
@@ -1020,6 +998,53 @@ XLogWalRcvFlush(bool dying)
        }
 }
 
+/*
+ * Close the current segment if it's completed.
+ *
+ * Flush the segment to disk before closing it. Otherwise we have to
+ * reopen and fsync it later.
+ *
+ * Create an archive notification file since the segment is known completed.
+ */
+static void
+XLogWalRcvClose(XLogRecPtr recptr)
+{
+       if (recvFile >= 0 && !XLByteInSeg(recptr, recvSegNo, wal_segment_size))
+       {
+               char            xlogfname[MAXFNAMELEN];
+
+               /*
+                * fsync() and close current file before we switch to next one. 
We
+                * would otherwise have to reopen this file to fsync it later
+                */
+               XLogWalRcvFlush(false);
+
+               XLogFileName(xlogfname, recvFileTLI, recvSegNo, 
wal_segment_size);
+
+               /*
+                * XLOG segment files will be re-read by recovery in startup 
process
+                * soon, so we don't advise the OS to release cache pages 
associated
+                * with the file like XLogFileClose() does.
+                */
+               if (close(recvFile) != 0)
+                       ereport(PANIC,
+                                       (errcode_for_file_access(),
+                                        errmsg("could not close log segment 
%s: %m",
+                                                       xlogfname)));
+
+               /*
+                * Create .done file forcibly to prevent the streamed segment 
from
+                * being archived later.
+                */
+               if (XLogArchiveMode != ARCHIVE_MODE_ALWAYS)
+                       XLogArchiveForceDone(xlogfname);
+               else
+                       XLogArchiveNotify(xlogfname);
+
+               recvFile = -1;
+       }
+}
+
 /*
  * Send reply message to primary, indicating our current WAL locations, oldest
  * xmin and the current time.

Reply via email to