On Sat, Sep 29, 2018 at 04:58:57PM +0900, Michael Paquier wrote:
> Actually, what you are proposing here sounds much better to me.  That's
> in the area of what has been done recently with RemoveTempXlogFiles() in
> 5fc1008e.  Any objections to doing something like that? 

Okay.  I have hacked a patch based on Stephen's idea as attached.  Any
opinions?
--
Michael
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 7375a78ffc..fe41400a7a 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -6456,6 +6456,11 @@ StartupXLOG(void)
 	 * used when creating a new segment, so perform some clean up to not
 	 * bloat this path.  This is done first as there is no point to sync this
 	 * temporary data.
+	 *	- The pg_wal/archive_status directory may still include .ready or
+	 * .done files which refer to already-removed WAL segments, as recycled
+	 * or removed segments are removed before the corresponding archive
+	 * status files are themselves removed.  This is also done before
+	 * syncing the data directory.
 	 *	- There might be data which we had written, intending to fsync it,
 	 * but which we had not actually fsync'd yet. Therefore, a power failure
 	 * in the near future might cause earlier unflushed writes to be lost,
@@ -6467,6 +6472,7 @@ StartupXLOG(void)
 		ControlFile->state != DB_SHUTDOWNED_IN_RECOVERY)
 	{
 		RemoveTempXlogFiles();
+		XLogArchiveCleanStatus();
 		SyncDataDirectory();
 	}
 
diff --git a/src/backend/access/transam/xlogarchive.c b/src/backend/access/transam/xlogarchive.c
index d40317168e..67818638d5 100644
--- a/src/backend/access/transam/xlogarchive.c
+++ b/src/backend/access/transam/xlogarchive.c
@@ -652,6 +652,56 @@ XLogArchiveCheckDone(const char *xlog)
 	return false;
 }
 
+/*
+ * XLogArchiveCleanStatus
+ *
+ * Remove .ready and .done files in pg_wal/archive_status which refer to
+ * non-existing WAL segments.  When a segment is removed or recycled at
+ * checkpoint or restart point, its corresponding archive status files are
+ * removed after, so if an instance crashes at this point some files may
+ * remain behind, confusing the archiver.
+ *
+ * This is called at the beginning of recovery after a previous crash where
+ * no other processes write WAL data.
+ */
+void
+XLogArchiveCleanStatus(void)
+{
+	DIR		   *xldir;
+	struct dirent *xlde;
+
+	elog(DEBUG2, "removing archive status referring to missing WAL segments");
+
+	xldir = AllocateDir(XLOGDIR "/archive_status");
+
+	while ((xlde = ReadDir(xldir, XLOGDIR)) != NULL)
+	{
+		char		path[MAXPGPATH];
+		char		xlogname[XLOG_FNAME_LEN + 1];
+		struct stat stat_buf;
+
+		/*
+		 * Check for compatible .ready and .done files, then extract the
+		 * related WAL segment.
+		 */
+		if (!StatusFileIsDone(xlde->d_name) &&
+			!StatusFileIsReady(xlde->d_name))
+			continue;
+
+		memcpy(xlogname, xlde->d_name, XLOG_FNAME_LEN);
+		xlogname[XLOG_FNAME_LEN] = '\0';
+		snprintf(path, MAXPGPATH, XLOGDIR "/%s", xlogname);
+
+		if (stat(path, &stat_buf) == 0)
+			continue;
+
+		snprintf(path, MAXPGPATH, XLOGDIR "/archive_status/%s", xlde->d_name);
+		unlink(path);
+		elog(DEBUG2, "removed archive status file \"%s\"", path);
+	}
+	FreeDir(xldir);
+}
+
 /*
  * XLogArchiveIsBusy
  *
@@ -760,6 +810,13 @@ XLogArchiveCleanup(const char *xlog)
 {
 	char		archiveStatusPath[MAXPGPATH];
 
+	/*
+	 * durable_unlink is not used here.  This is used after a segment has been
+	 * removed or renamed, and even if the system crashes in-between
+	 * notification files referring to missing WAL segments are automatically
+	 * removed at the beginning of recovery.
+	 */
+
 	/* Remove the .done file */
 	StatusFilePath(archiveStatusPath, xlog, ".done");
 	unlink(archiveStatusPath);
diff --git a/src/include/access/xlog_internal.h b/src/include/access/xlog_internal.h
index 30610b3ea9..4c8d7dae62 100644
--- a/src/include/access/xlog_internal.h
+++ b/src/include/access/xlog_internal.h
@@ -201,6 +201,16 @@ typedef XLogLongPageHeaderData *XLogLongPageHeader;
 #define StatusFilePath(path, xlog, suffix)	\
 	snprintf(path, MAXPGPATH, XLOGDIR "/archive_status/%s%s", xlog, suffix)
 
+#define StatusFileIsReady(fname) \
+	(strlen(fname) == XLOG_FNAME_LEN + strlen(".ready") &&		\
+	 strspn(fname, "0123456789ABCDEF") == XLOG_FNAME_LEN &&		\
+	 strcmp((fname) + XLOG_FNAME_LEN, ".ready") == 0)
+
+#define StatusFileIsDone(fname) \
+	(strlen(fname) == XLOG_FNAME_LEN + strlen(".done") &&		\
+	 strspn(fname, "0123456789ABCDEF") == XLOG_FNAME_LEN &&		\
+	 strcmp((fname) + XLOG_FNAME_LEN, ".done") == 0)
+
 #define BackupHistoryFileName(fname, tli, logSegNo, startpoint, wal_segsz_bytes) \
 	snprintf(fname, MAXFNAMELEN, "%08X%08X%08X.%08X.backup", tli, \
 			 (uint32) ((logSegNo) / XLogSegmentsPerXLogId(wal_segsz_bytes)), \
@@ -328,6 +338,7 @@ extern void XLogArchiveNotify(const char *xlog);
 extern void XLogArchiveNotifySeg(XLogSegNo segno);
 extern void XLogArchiveForceDone(const char *xlog);
 extern bool XLogArchiveCheckDone(const char *xlog);
+extern void XLogArchiveCleanStatus(void);
 extern bool XLogArchiveIsBusy(const char *xlog);
 extern bool XLogArchiveIsReady(const char *xlog);
 extern bool XLogArchiveIsReadyOrDone(const char *xlog);

Attachment: signature.asc
Description: PGP signature

Reply via email to