At Thu, 03 Jun 2021 21:52:08 +0900 (JST), Kyotaro Horiguchi 
<horikyota....@gmail.com> wrote in 
> 
> https://www.postgresql.org/docs/14/continuous-archiving.html
> > The archive command should generally be designed to refuse to
> > overwrite any pre-existing archive file. This is an important safety
> > feature to preserve the integrity of your archive in case of
> > administrator error (such as sending the output of two different
> > servers to the same archive directory).
> 
> I'm not sure how we should treat this..  Since archive must store
> files actually applied to the server data, just being already archived
> cannot be the reason for omitting archiving.  We need to make sure the
> new file is byte-identical to the already-archived version. We could
> compare just *restored* file to the same file in pg_wal but it might
> be too much of penalty for for the benefit. (Attached second file.)

(To recap: In a replication set using archive, startup tries to
restore WAL files from archive before checking pg_wal directory for
the desired file.  The behavior itself is intentionally designed and
reasonable. However, the restore code notifies of a restored file
regardless of whether it has been already archived or not.  If
archive_command is written so as to return error for overwriting as we
suggest in the documentation, that behavior causes archive failure.)

After playing with this, I see the problem just by restarting a
standby even in a simple archive-replication set after making
not-special prerequisites.  So I think this is worth fixing.

With this patch, KeepFileRestoredFromArchive compares the contents of
just-restored file and the existing file for the same segment only
when:

     - archive_mode = always
 and - the file to restore already exists in pgwal
 and - it has a .done and/or .ready status file.

which doesn't happen usually.  Then the function skips archive
notification if the contents are identical.  The included TAP test is
working both on Linux and Windows.


regards.

-- 
Kyotaro Horiguchi
NTT Open Source Software Center
diff --git a/src/test/recovery/t/020_archive_status.pl b/src/test/recovery/t/020_archive_status.pl
index 777bf05274..ed5e80327b 100644
--- a/src/test/recovery/t/020_archive_status.pl
+++ b/src/test/recovery/t/020_archive_status.pl
@@ -8,7 +8,7 @@ use strict;
 use warnings;
 use PostgresNode;
 use TestLib;
-use Test::More tests => 16;
+use Test::More tests => 17;
 use Config;
 
 my $primary = get_new_node('primary');
@@ -176,6 +176,7 @@ ok( -f "$standby1_data/$segment_path_2_done",
 # will cause archiving failures.
 my $standby2 = get_new_node('standby2');
 $standby2->init_from_backup($primary, 'backup', has_restoring => 1);
+$standby2->enable_archiving;
 $standby2->append_conf('postgresql.conf', 'archive_mode = always');
 my $standby2_data = $standby2->data_dir;
 $standby2->start;
@@ -234,3 +235,49 @@ ok( -f "$standby2_data/$segment_path_1_done"
 	  && -f "$standby2_data/$segment_path_2_done",
 	".done files created after archive success with archive_mode=always on standby"
 );
+
+
+# Check if restart of a archive-replicated standby doesn't archive the
+# same file twice.
+
+# Use duplicate-resistent archive_command
+my $archpath = TestLib::perl2host($standby2->archive_dir);
+$archpath =~ s{\\}{\\\\}g if ($TestLib::windows_os);
+my $nodup_command =
+	  $TestLib::windows_os
+	  ? qq{if not exist "$archpath\\\\%f" (copy "%p" "$archpath\\\\%f") else (exit 1)}
+	  : qq{test ! -f "$archpath/%f" && cp "%p" "$archpath/%f"};
+$standby2->safe_psql(
+	'postgres', qq{
+	ALTER SYSTEM SET archive_command TO '$nodup_command';
+	SELECT pg_reload_conf();
+});
+
+# Remember the current semgent file, The +1 below is an adjustment to avoid
+# segment borders.
+my $cursegfile = $primary->safe_psql(
+	'postgres',
+	'SELECT pg_walfile_name(pg_current_wal_lsn() + 1)');
+$primary->psql('postgres', 'CHECKPOINT; SELECT pg_switch_wal();');
+$standby2->psql('postgres', 'CHECKPOINT');
+$standby2->poll_query_until(
+	'postgres',
+	"SELECT last_archived_wal = '$cursegfile' FROM pg_stat_archiver")
+  or die "Timed out waiting for the segment to be archived";
+
+$standby2->restart;
+
+$primary->psql('postgres', 'CHECKPOINT; SELECT pg_switch_wal();');
+
+$standby2->poll_query_until(
+	'postgres', qq[
+	SELECT last_archived_wal IS NOT NULL OR
+           last_failed_wal IS NOT NULL
+	FROM pg_stat_archiver]);
+
+my $result =
+  $standby2->safe_psql(
+	  'postgres',
+	  "SELECT last_archived_wal, last_failed_wal FROM pg_stat_archiver");
+
+ok($result =~ /^[^|]+\|$/, 'check if archiving proceeds');
diff --git a/src/backend/access/transam/xlogarchive.c b/src/backend/access/transam/xlogarchive.c
index 26b023e754..7d94e7f963 100644
--- a/src/backend/access/transam/xlogarchive.c
+++ b/src/backend/access/transam/xlogarchive.c
@@ -382,6 +382,7 @@ KeepFileRestoredFromArchive(const char *path, const char *xlogfname)
 {
 	char		xlogfpath[MAXPGPATH];
 	bool		reload = false;
+	bool		already_archived = false;
 	struct stat statbuf;
 
 	snprintf(xlogfpath, MAXPGPATH, XLOGDIR "/%s", xlogfname);
@@ -416,6 +417,68 @@ KeepFileRestoredFromArchive(const char *path, const char *xlogfname)
 		/* same-size buffers, so this never truncates */
 		strlcpy(oldpath, xlogfpath, MAXPGPATH);
 #endif
+		/*
+		 * On a standby with archive_mode=always, there's the case where the
+		 * same file is archived more than once. If the archive_command rejects
+		 * overwriting, WAL-archiving won't go further than the file forever.
+		 * Avoid duplicate archiving attempts when the file with the same
+		 * content is known to have been already archived or notified.
+		 */
+		if (XLogArchiveMode == ARCHIVE_MODE_ALWAYS &&
+			XLogArchiveIsReadyOrDone(xlogfname))
+		{
+			int fd1;
+			int fd2 = -1;
+
+			fd1 = BasicOpenFile(path, O_RDONLY | PG_BINARY);
+			if (fd1 >= 0)
+				fd2 = BasicOpenFile(oldpath, O_RDONLY | PG_BINARY);
+
+			if (fd1 < 0 || fd2 < 0)
+			{
+				ereport(WARNING,
+						(errcode_for_file_access(),
+						 errmsg("could not open file \"%s\", skip duplicate check: %m",
+								fd1 < 0 ? path : oldpath)));
+				if (fd1 >= 0)
+					close(fd1);
+			}
+			else
+			{
+				unsigned char srcbuf[XLOG_BLCKSZ];
+				unsigned char dstbuf[XLOG_BLCKSZ];
+				uint32 i;
+
+				/*
+				 * Compare the two files' contents.  We don't bother
+				 * completing if something's wrong meanwhile.
+				 */
+				for (i = 0 ; i < wal_segment_size / XLOG_BLCKSZ ; i++)
+				{
+					if (read(fd1, srcbuf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
+						break;
+
+					if (read(fd2, dstbuf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
+						break;
+
+					if (memcmp(srcbuf, dstbuf, XLOG_BLCKSZ) != 0)
+						break;
+				}
+
+				close(fd1);
+				close(fd2);
+
+				if (i == wal_segment_size / XLOG_BLCKSZ)
+				{
+					already_archived = true;
+
+					ereport(LOG,
+							(errmsg("log file \"%s\" have been already archived, skip archiving",
+									xlogfname)));
+				}
+			}
+		}
+
 		if (unlink(oldpath) != 0)
 			ereport(FATAL,
 					(errcode_for_file_access(),
@@ -432,7 +495,7 @@ KeepFileRestoredFromArchive(const char *path, const char *xlogfname)
 	 */
 	if (XLogArchiveMode != ARCHIVE_MODE_ALWAYS)
 		XLogArchiveForceDone(xlogfname);
-	else
+	else if (!already_archived)
 		XLogArchiveNotify(xlogfname);
 
 	/*

Reply via email to