On 2021/09/08 16:40, Kyotaro Horiguchi wrote:
No. The discussion taken there is not about permanently missing .ready files, but about .ready files created out-of-order. So I don't think the outcome from the thread does *fix* this issue.
Hmm...
I don't think we want such extent of perfectness at all for the case where some archive-related parameters are changed after a crash. Anyway we need to take a backup after that and at least all segments required for the backup will be properly archived. The segments up to the XLOG_SWITCH segment are harmless garbage, or a bit of food for disk.
So probably we reached the consensus that something like the attached patch (XLogArchiveCheckDone_walfile_xlog_switch.patch) is enough for the corner case where walreceiver fails to create .ready file of WAL file including XLOG_SWITCH?
Sounds convincing. Ok, I agree to that.
So barring any objection, I will commit the patch and back-patch it to all supported version. walreceiver_notify_archive_soon_v5.patch walreceiver_notify_archive_soon_v5_pg14-13.patch walreceiver_notify_archive_soon_v5_pg12-11.patch walreceiver_notify_archive_soon_v5_pg10.patch walreceiver_notify_archive_soon_v5_pg96.patch Regards, -- Fujii Masao Advanced Computing Technology Center Research and Development Headquarters NTT DATA CORPORATION
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index e51a7a749d..6046e24f0f 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -7392,6 +7392,27 @@ StartupXLOG(void) /* Handle interrupt signals of startup process */ HandleStartupProcInterrupts(); + /* + * In standby mode, create an archive notification file of a + * WAL segment if it includes an XLOG_SWITCH record and its + * notification file has not been created yet. This is + * necessary to handle the corner case that walreceiver may + * fail to create such notification file if it exits after it + * receives XLOG_SWITCH record but while it's receiving the + * remaining bytes in the segment. Without this handling, WAL + * archiving of the segment will be delayed until subsequent + * checkpoint creates its notification file when removing it + * even though it can be archived soon. + */ + if (StandbyMode && record->xl_rmid == RM_XLOG_ID && + (record->xl_info & ~XLR_INFO_MASK) == XLOG_SWITCH) + { + char xlogfilename[MAXFNAMELEN]; + + XLogFileName(xlogfilename, curFileTLI, readSegNo, wal_segment_size); + XLogArchiveCheckDone(xlogfilename); + } + /* * Pause WAL replay, if requested by a hot-standby session via * SetRecoveryPause().
diff --git a/src/backend/replication/walreceiver.c b/src/backend/replication/walreceiver.c index 75ec985953..2818bf5e25 100644 --- a/src/backend/replication/walreceiver.c +++ b/src/backend/replication/walreceiver.c @@ -118,6 +118,7 @@ static void WalRcvDie(int code, Datum arg); static void XLogWalRcvProcessMsg(unsigned char type, char *buf, Size len); static void XLogWalRcvWrite(char *buf, Size nbytes, XLogRecPtr recptr); static void XLogWalRcvFlush(bool dying); +static void XLogWalRcvClose(XLogRecPtr recptr); static void XLogWalRcvSendReply(bool force, bool requestReply); static void XLogWalRcvSendHSFeedback(bool immed); static void ProcessWalSndrMessage(XLogRecPtr walEnd, TimestampTz sendTime); @@ -920,46 +921,16 @@ XLogWalRcvWrite(char *buf, Size nbytes, XLogRecPtr recptr) { int segbytes; - if (recvFile < 0 || !XLByteInSeg(recptr, recvSegNo)) - { - bool use_existent; - - /* - * fsync() and close current file before we switch to next one. We - * would otherwise have to reopen this file to fsync it later - */ - if (recvFile >= 0) - { - char xlogfname[MAXFNAMELEN]; + /* Close the current segment if it's completed */ + if (recvFile >= 0 && !XLByteInSeg(recptr, recvSegNo)) + XLogWalRcvClose(recptr); - XLogWalRcvFlush(false); - - /* - * XLOG segment files will be re-read by recovery in startup - * process soon, so we don't advise the OS to release cache - * pages associated with the file like XLogFileClose() does. - */ - if (close(recvFile) != 0) - ereport(PANIC, - (errcode_for_file_access(), - errmsg("could not close log segment %s: %m", - XLogFileNameP(recvFileTLI, recvSegNo)))); - - /* - * Create .done file forcibly to prevent the streamed segment - * from being archived later. - */ - XLogFileName(xlogfname, recvFileTLI, recvSegNo); - if (XLogArchiveMode != ARCHIVE_MODE_ALWAYS) - XLogArchiveForceDone(xlogfname); - else - XLogArchiveNotify(xlogfname); - } - recvFile = -1; + if (recvFile < 0) + { + bool use_existent = true; /* Create/use new log file */ XLByteToSeg(recptr, recvSegNo); - use_existent = true; recvFile = XLogFileInit(recvSegNo, &use_existent, true); recvFileTLI = ThisTimeLineID; recvOff = 0; @@ -1011,6 +982,15 @@ XLogWalRcvWrite(char *buf, Size nbytes, XLogRecPtr recptr) LogstreamResult.Write = recptr; } + + /* + * Close the current segment if it's fully written up in the last cycle of + * the loop, to create its archive notification file soon. Otherwise WAL + * archiving of the segment will be delayed until any data in the next + * segment is received and written. + */ + if (recvFile >= 0 && !XLByteInSeg(recptr, recvSegNo)) + XLogWalRcvClose(recptr); } /* @@ -1065,6 +1045,52 @@ XLogWalRcvFlush(bool dying) } } +/* + * Close the current segment. + * + * Flush the segment to disk before closing it. Otherwise we have to + * reopen and fsync it later. + * + * Create an archive notification file since the segment is known completed. + */ +static void +XLogWalRcvClose(XLogRecPtr recptr) +{ + char xlogfname[MAXFNAMELEN]; + + Assert(recvFile >= 0 && !XLByteInSeg(recptr, recvSegNo)); + + /* + * fsync() and close current file before we switch to next one. We would + * otherwise have to reopen this file to fsync it later + */ + XLogWalRcvFlush(false); + + XLogFileName(xlogfname, recvFileTLI, recvSegNo); + + /* + * XLOG segment files will be re-read by recovery in startup process soon, + * so we don't advise the OS to release cache pages associated with the + * file like XLogFileClose() does. + */ + if (close(recvFile) != 0) + ereport(PANIC, + (errcode_for_file_access(), + errmsg("could not close log segment %s: %m", + xlogfname))); + + /* + * Create .done file forcibly to prevent the streamed segment from being + * archived later. + */ + if (XLogArchiveMode != ARCHIVE_MODE_ALWAYS) + XLogArchiveForceDone(xlogfname); + else + XLogArchiveNotify(xlogfname); + + recvFile = -1; +} + /* * Send reply message to primary, indicating our current WAL locations, oldest * xmin and the current time.
diff --git a/src/backend/replication/walreceiver.c b/src/backend/replication/walreceiver.c index c6cb90da81..441af76c0f 100644 --- a/src/backend/replication/walreceiver.c +++ b/src/backend/replication/walreceiver.c @@ -119,6 +119,7 @@ static void WalRcvDie(int code, Datum arg); static void XLogWalRcvProcessMsg(unsigned char type, char *buf, Size len); static void XLogWalRcvWrite(char *buf, Size nbytes, XLogRecPtr recptr); static void XLogWalRcvFlush(bool dying); +static void XLogWalRcvClose(XLogRecPtr recptr); static void XLogWalRcvSendReply(bool force, bool requestReply); static void XLogWalRcvSendHSFeedback(bool immed); static void ProcessWalSndrMessage(XLogRecPtr walEnd, TimestampTz sendTime); @@ -906,46 +907,16 @@ XLogWalRcvWrite(char *buf, Size nbytes, XLogRecPtr recptr) { int segbytes; - if (recvFile < 0 || !XLByteInSeg(recptr, recvSegNo, wal_segment_size)) - { - bool use_existent; - - /* - * fsync() and close current file before we switch to next one. We - * would otherwise have to reopen this file to fsync it later - */ - if (recvFile >= 0) - { - char xlogfname[MAXFNAMELEN]; + /* Close the current segment if it's completed */ + if (recvFile >= 0 && !XLByteInSeg(recptr, recvSegNo, wal_segment_size)) + XLogWalRcvClose(recptr); - XLogWalRcvFlush(false); - - /* - * XLOG segment files will be re-read by recovery in startup - * process soon, so we don't advise the OS to release cache - * pages associated with the file like XLogFileClose() does. - */ - if (close(recvFile) != 0) - ereport(PANIC, - (errcode_for_file_access(), - errmsg("could not close log segment %s: %m", - XLogFileNameP(recvFileTLI, recvSegNo)))); - - /* - * Create .done file forcibly to prevent the streamed segment - * from being archived later. - */ - XLogFileName(xlogfname, recvFileTLI, recvSegNo, wal_segment_size); - if (XLogArchiveMode != ARCHIVE_MODE_ALWAYS) - XLogArchiveForceDone(xlogfname); - else - XLogArchiveNotify(xlogfname); - } - recvFile = -1; + if (recvFile < 0) + { + bool use_existent = true; /* Create/use new log file */ XLByteToSeg(recptr, recvSegNo, wal_segment_size); - use_existent = true; recvFile = XLogFileInit(recvSegNo, &use_existent, true); recvFileTLI = ThisTimeLineID; recvOff = 0; @@ -997,6 +968,15 @@ XLogWalRcvWrite(char *buf, Size nbytes, XLogRecPtr recptr) LogstreamResult.Write = recptr; } + + /* + * Close the current segment if it's fully written up in the last cycle of + * the loop, to create its archive notification file soon. Otherwise WAL + * archiving of the segment will be delayed until any data in the next + * segment is received and written. + */ + if (recvFile >= 0 && !XLByteInSeg(recptr, recvSegNo, wal_segment_size)) + XLogWalRcvClose(recptr); } /* @@ -1051,6 +1031,52 @@ XLogWalRcvFlush(bool dying) } } +/* + * Close the current segment. + * + * Flush the segment to disk before closing it. Otherwise we have to + * reopen and fsync it later. + * + * Create an archive notification file since the segment is known completed. + */ +static void +XLogWalRcvClose(XLogRecPtr recptr) +{ + char xlogfname[MAXFNAMELEN]; + + Assert(recvFile >= 0 && !XLByteInSeg(recptr, recvSegNo, wal_segment_size)); + + /* + * fsync() and close current file before we switch to next one. We would + * otherwise have to reopen this file to fsync it later + */ + XLogWalRcvFlush(false); + + XLogFileName(xlogfname, recvFileTLI, recvSegNo, wal_segment_size); + + /* + * XLOG segment files will be re-read by recovery in startup process soon, + * so we don't advise the OS to release cache pages associated with the + * file like XLogFileClose() does. + */ + if (close(recvFile) != 0) + ereport(PANIC, + (errcode_for_file_access(), + errmsg("could not close log segment %s: %m", + xlogfname))); + + /* + * Create .done file forcibly to prevent the streamed segment from being + * archived later. + */ + if (XLogArchiveMode != ARCHIVE_MODE_ALWAYS) + XLogArchiveForceDone(xlogfname); + else + XLogArchiveNotify(xlogfname); + + recvFile = -1; +} + /* * Send reply message to primary, indicating our current WAL locations, oldest * xmin and the current time.
diff --git a/src/backend/replication/walreceiver.c b/src/backend/replication/walreceiver.c index faeea9f0cc..4831a259c4 100644 --- a/src/backend/replication/walreceiver.c +++ b/src/backend/replication/walreceiver.c @@ -125,6 +125,7 @@ static void WalRcvDie(int code, Datum arg); static void XLogWalRcvProcessMsg(unsigned char type, char *buf, Size len); static void XLogWalRcvWrite(char *buf, Size nbytes, XLogRecPtr recptr); static void XLogWalRcvFlush(bool dying); +static void XLogWalRcvClose(XLogRecPtr recptr); static void XLogWalRcvSendReply(bool force, bool requestReply); static void XLogWalRcvSendHSFeedback(bool immed); static void ProcessWalSndrMessage(XLogRecPtr walEnd, TimestampTz sendTime); @@ -883,47 +884,16 @@ XLogWalRcvWrite(char *buf, Size nbytes, XLogRecPtr recptr) { int segbytes; - if (recvFile < 0 || !XLByteInSeg(recptr, recvSegNo, wal_segment_size)) - { - bool use_existent; - - /* - * fsync() and close current file before we switch to next one. We - * would otherwise have to reopen this file to fsync it later - */ - if (recvFile >= 0) - { - char xlogfname[MAXFNAMELEN]; - - XLogWalRcvFlush(false); - - XLogFileName(xlogfname, recvFileTLI, recvSegNo, wal_segment_size); - - /* - * XLOG segment files will be re-read by recovery in startup - * process soon, so we don't advise the OS to release cache - * pages associated with the file like XLogFileClose() does. - */ - if (close(recvFile) != 0) - ereport(PANIC, - (errcode_for_file_access(), - errmsg("could not close log segment %s: %m", - xlogfname))); + /* Close the current segment if it's completed */ + if (recvFile >= 0 && !XLByteInSeg(recptr, recvSegNo, wal_segment_size)) + XLogWalRcvClose(recptr); - /* - * Create .done file forcibly to prevent the streamed segment - * from being archived later. - */ - if (XLogArchiveMode != ARCHIVE_MODE_ALWAYS) - XLogArchiveForceDone(xlogfname); - else - XLogArchiveNotify(xlogfname); - } - recvFile = -1; + if (recvFile < 0) + { + bool use_existent = true; /* Create/use new log file */ XLByteToSeg(recptr, recvSegNo, wal_segment_size); - use_existent = true; recvFile = XLogFileInit(recvSegNo, &use_existent, true); recvFileTLI = ThisTimeLineID; } @@ -970,6 +940,15 @@ XLogWalRcvWrite(char *buf, Size nbytes, XLogRecPtr recptr) /* Update shared-memory status */ pg_atomic_write_u64(&WalRcv->writtenUpto, LogstreamResult.Write); + + /* + * Close the current segment if it's fully written up in the last cycle of + * the loop, to create its archive notification file soon. Otherwise WAL + * archiving of the segment will be delayed until any data in the next + * segment is received and written. + */ + if (recvFile >= 0 && !XLByteInSeg(recptr, recvSegNo, wal_segment_size)) + XLogWalRcvClose(recptr); } /* @@ -1023,6 +1002,52 @@ XLogWalRcvFlush(bool dying) } } +/* + * Close the current segment. + * + * Flush the segment to disk before closing it. Otherwise we have to + * reopen and fsync it later. + * + * Create an archive notification file since the segment is known completed. + */ +static void +XLogWalRcvClose(XLogRecPtr recptr) +{ + char xlogfname[MAXFNAMELEN]; + + Assert(recvFile >= 0 && !XLByteInSeg(recptr, recvSegNo, wal_segment_size)); + + /* + * fsync() and close current file before we switch to next one. We would + * otherwise have to reopen this file to fsync it later + */ + XLogWalRcvFlush(false); + + XLogFileName(xlogfname, recvFileTLI, recvSegNo, wal_segment_size); + + /* + * XLOG segment files will be re-read by recovery in startup process soon, + * so we don't advise the OS to release cache pages associated with the + * file like XLogFileClose() does. + */ + if (close(recvFile) != 0) + ereport(PANIC, + (errcode_for_file_access(), + errmsg("could not close log segment %s: %m", + xlogfname))); + + /* + * Create .done file forcibly to prevent the streamed segment from being + * archived later. + */ + if (XLogArchiveMode != ARCHIVE_MODE_ALWAYS) + XLogArchiveForceDone(xlogfname); + else + XLogArchiveNotify(xlogfname); + + recvFile = -1; +} + /* * Send reply message to primary, indicating our current WAL locations, oldest * xmin and the current time.
diff --git a/src/backend/replication/walreceiver.c b/src/backend/replication/walreceiver.c index 5c3a94cc3d..85b3f6def3 100644 --- a/src/backend/replication/walreceiver.c +++ b/src/backend/replication/walreceiver.c @@ -144,6 +144,7 @@ static void WalRcvDie(int code, Datum arg); static void XLogWalRcvProcessMsg(unsigned char type, char *buf, Size len); static void XLogWalRcvWrite(char *buf, Size nbytes, XLogRecPtr recptr); static void XLogWalRcvFlush(bool dying); +static void XLogWalRcvClose(XLogRecPtr recptr); static void XLogWalRcvSendReply(bool force, bool requestReply); static void XLogWalRcvSendHSFeedback(bool immed); static void ProcessWalSndrMessage(XLogRecPtr walEnd, TimestampTz sendTime); @@ -940,46 +941,16 @@ XLogWalRcvWrite(char *buf, Size nbytes, XLogRecPtr recptr) { int segbytes; - if (recvFile < 0 || !XLByteInSeg(recptr, recvSegNo)) - { - bool use_existent; - - /* - * fsync() and close current file before we switch to next one. We - * would otherwise have to reopen this file to fsync it later - */ - if (recvFile >= 0) - { - char xlogfname[MAXFNAMELEN]; + /* Close the current segment if it's completed */ + if (recvFile >= 0 && !XLByteInSeg(recptr, recvSegNo)) + XLogWalRcvClose(recptr); - XLogWalRcvFlush(false); - - /* - * XLOG segment files will be re-read by recovery in startup - * process soon, so we don't advise the OS to release cache - * pages associated with the file like XLogFileClose() does. - */ - if (close(recvFile) != 0) - ereport(PANIC, - (errcode_for_file_access(), - errmsg("could not close log segment %s: %m", - XLogFileNameP(recvFileTLI, recvSegNo)))); - - /* - * Create .done file forcibly to prevent the streamed segment - * from being archived later. - */ - XLogFileName(xlogfname, recvFileTLI, recvSegNo); - if (XLogArchiveMode != ARCHIVE_MODE_ALWAYS) - XLogArchiveForceDone(xlogfname); - else - XLogArchiveNotify(xlogfname); - } - recvFile = -1; + if (recvFile < 0) + { + bool use_existent = true; /* Create/use new log file */ XLByteToSeg(recptr, recvSegNo); - use_existent = true; recvFile = XLogFileInit(recvSegNo, &use_existent, true); recvFileTLI = ThisTimeLineID; recvOff = 0; @@ -1031,6 +1002,15 @@ XLogWalRcvWrite(char *buf, Size nbytes, XLogRecPtr recptr) LogstreamResult.Write = recptr; } + + /* + * Close the current segment if it's fully written up in the last cycle of + * the loop, to create its archive notification file soon. Otherwise WAL + * archiving of the segment will be delayed until any data in the next + * segment is received and written. + */ + if (recvFile >= 0 && !XLByteInSeg(recptr, recvSegNo)) + XLogWalRcvClose(recptr); } /* @@ -1085,6 +1065,52 @@ XLogWalRcvFlush(bool dying) } } +/* + * Close the current segment. + * + * Flush the segment to disk before closing it. Otherwise we have to + * reopen and fsync it later. + * + * Create an archive notification file since the segment is known completed. + */ +static void +XLogWalRcvClose(XLogRecPtr recptr) +{ + char xlogfname[MAXFNAMELEN]; + + Assert(recvFile >= 0 && !XLByteInSeg(recptr, recvSegNo)); + + /* + * fsync() and close current file before we switch to next one. We would + * otherwise have to reopen this file to fsync it later + */ + XLogWalRcvFlush(false); + + XLogFileName(xlogfname, recvFileTLI, recvSegNo); + + /* + * XLOG segment files will be re-read by recovery in startup process soon, + * so we don't advise the OS to release cache pages associated with the + * file like XLogFileClose() does. + */ + if (close(recvFile) != 0) + ereport(PANIC, + (errcode_for_file_access(), + errmsg("could not close log segment %s: %m", + xlogfname))); + + /* + * Create .done file forcibly to prevent the streamed segment from being + * archived later. + */ + if (XLogArchiveMode != ARCHIVE_MODE_ALWAYS) + XLogArchiveForceDone(xlogfname); + else + XLogArchiveNotify(xlogfname); + + recvFile = -1; +} + /* * Send reply message to primary, indicating our current XLOG positions, oldest * xmin and the current time.
diff --git a/src/backend/replication/walreceiver.c b/src/backend/replication/walreceiver.c index 9a2bc37fd7..b90e5ca98e 100644 --- a/src/backend/replication/walreceiver.c +++ b/src/backend/replication/walreceiver.c @@ -125,6 +125,7 @@ static void WalRcvDie(int code, Datum arg); static void XLogWalRcvProcessMsg(unsigned char type, char *buf, Size len); static void XLogWalRcvWrite(char *buf, Size nbytes, XLogRecPtr recptr); static void XLogWalRcvFlush(bool dying); +static void XLogWalRcvClose(XLogRecPtr recptr); static void XLogWalRcvSendReply(bool force, bool requestReply); static void XLogWalRcvSendHSFeedback(bool immed); static void ProcessWalSndrMessage(XLogRecPtr walEnd, TimestampTz sendTime); @@ -883,42 +884,12 @@ XLogWalRcvWrite(char *buf, Size nbytes, XLogRecPtr recptr) { int segbytes; - if (recvFile < 0 || !XLByteInSeg(recptr, recvSegNo, wal_segment_size)) + /* Close the current segment if it's completed */ + if (recvFile >= 0 && !XLByteInSeg(recptr, recvSegNo, wal_segment_size)) + XLogWalRcvClose(recptr); + + if (recvFile < 0) { - /* - * fsync() and close current file before we switch to next one. We - * would otherwise have to reopen this file to fsync it later - */ - if (recvFile >= 0) - { - char xlogfname[MAXFNAMELEN]; - - XLogWalRcvFlush(false); - - XLogFileName(xlogfname, recvFileTLI, recvSegNo, wal_segment_size); - - /* - * XLOG segment files will be re-read by recovery in startup - * process soon, so we don't advise the OS to release cache - * pages associated with the file like XLogFileClose() does. - */ - if (close(recvFile) != 0) - ereport(PANIC, - (errcode_for_file_access(), - errmsg("could not close log segment %s: %m", - xlogfname))); - - /* - * Create .done file forcibly to prevent the streamed segment - * from being archived later. - */ - if (XLogArchiveMode != ARCHIVE_MODE_ALWAYS) - XLogArchiveForceDone(xlogfname); - else - XLogArchiveNotify(xlogfname); - } - recvFile = -1; - /* Create/use new log file */ XLByteToSeg(recptr, recvSegNo, wal_segment_size); recvFile = XLogFileInit(recvSegNo); @@ -967,6 +938,15 @@ XLogWalRcvWrite(char *buf, Size nbytes, XLogRecPtr recptr) /* Update shared-memory status */ pg_atomic_write_u64(&WalRcv->writtenUpto, LogstreamResult.Write); + + /* + * Close the current segment if it's fully written up in the last cycle of + * the loop, to create its archive notification file soon. Otherwise WAL + * archiving of the segment will be delayed until any data in the next + * segment is received and written. + */ + if (recvFile >= 0 && !XLByteInSeg(recptr, recvSegNo, wal_segment_size)) + XLogWalRcvClose(recptr); } /* @@ -1020,6 +1000,52 @@ XLogWalRcvFlush(bool dying) } } +/* + * Close the current segment. + * + * Flush the segment to disk before closing it. Otherwise we have to + * reopen and fsync it later. + * + * Create an archive notification file since the segment is known completed. + */ +static void +XLogWalRcvClose(XLogRecPtr recptr) +{ + char xlogfname[MAXFNAMELEN]; + + Assert(recvFile >= 0 && !XLByteInSeg(recptr, recvSegNo, wal_segment_size)); + + /* + * fsync() and close current file before we switch to next one. We would + * otherwise have to reopen this file to fsync it later + */ + XLogWalRcvFlush(false); + + XLogFileName(xlogfname, recvFileTLI, recvSegNo, wal_segment_size); + + /* + * XLOG segment files will be re-read by recovery in startup process soon, + * so we don't advise the OS to release cache pages associated with the + * file like XLogFileClose() does. + */ + if (close(recvFile) != 0) + ereport(PANIC, + (errcode_for_file_access(), + errmsg("could not close log segment %s: %m", + xlogfname))); + + /* + * Create .done file forcibly to prevent the streamed segment from being + * archived later. + */ + if (XLogArchiveMode != ARCHIVE_MODE_ALWAYS) + XLogArchiveForceDone(xlogfname); + else + XLogArchiveNotify(xlogfname); + + recvFile = -1; +} + /* * Send reply message to primary, indicating our current WAL locations, oldest * xmin and the current time.