From 3de0c93d385c7127b87b5b9e3a9a9eb1c5aad237 Mon Sep 17 00:00:00 2001
From: John Hsu <johnyvr@gmail.com>
Date: Tue, 1 Jul 2025 18:05:37 +0000
Subject: [PATCH] Avoid copying WAL segments before divergence to speed up
 pg_rewind

Adds a conditional check to avoid unnecessarily copying any
WAL segment files from source to target if they are common
between both servers before the point of WAL divergence
during pg_rewind. On the source server, each WAL file's.
All WAL files that exist on source and target, which fall
before the segment of the first diverged LSN can safely be
skipped from copying to the target.
---
 src/bin/pg_rewind/filemap.c                   | 77 +++++++++++++++++--
 src/bin/pg_rewind/filemap.h                   | 12 ++-
 src/bin/pg_rewind/pg_rewind.c                 |  6 +-
 src/bin/pg_rewind/t/008_min_recovery_point.pl | 18 ++++-
 4 files changed, 102 insertions(+), 11 deletions(-)

diff --git a/src/bin/pg_rewind/filemap.c b/src/bin/pg_rewind/filemap.c
index c933871ca9f..74800568be5 100644
--- a/src/bin/pg_rewind/filemap.c
+++ b/src/bin/pg_rewind/filemap.c
@@ -199,6 +199,28 @@ filehash_init(void)
 	filehash = filehash_create(FILEHASH_INITIAL_SIZE, NULL);
 }
 
+/* Determine the type of file content (relation, WAL, or other) */
+static file_content_type_t
+getFileType(const char *path)
+{
+	if (isRelDataFile(path))
+		return FILE_CONTENT_TYPE_RELATION;
+	else
+	{
+		/* Extract the filename from the path */
+		const char *filename = last_dir_separator(path);
+		if (filename == NULL)
+			filename = path;
+		else
+			filename++;  /* Skip the separator */
+
+		if (IsXLogFileName(filename))
+			return FILE_CONTENT_TYPE_WAL;
+	}
+
+	return FILE_CONTENT_TYPE_OTHER;
+}
+
 /* Look up entry for 'path', creating a new one if it doesn't exist */
 static file_entry_t *
 insert_filehash_entry(const char *path)
@@ -210,7 +232,7 @@ insert_filehash_entry(const char *path)
 	if (!found)
 	{
 		entry->path = pg_strdup(path);
-		entry->isrelfile = isRelDataFile(path);
+		entry->content_type = getFileType(path);
 
 		entry->target_exists = false;
 		entry->target_type = FILE_TYPE_UNDEFINED;
@@ -383,7 +405,7 @@ process_target_wal_block_change(ForkNumber forknum, RelFileLocator rlocator,
 	 */
 	if (entry)
 	{
-		Assert(entry->isrelfile);
+		Assert(entry->content_type == FILE_CONTENT_TYPE_RELATION);
 
 		if (entry->target_exists)
 		{
@@ -693,11 +715,43 @@ final_filemap_cmp(const void *a, const void *b)
 		return strcmp(fa->path, fb->path);
 }
 
+/*
+ * Decide what to do with a WAL segment file based on its position
+ * relative to the point of divergence.
+ * Caller is responsible for ensuring the file exists on both
+ * source and target.
+ */
+static file_action_t
+decide_wal_file_action(const char *fname, XLogSegNo last_common_segno, int wal_segsz_bytes)
+{
+	TimeLineID  file_tli;
+	XLogSegNo   file_segno;
+
+	/* Get current WAL segment number given current segment file name */
+	XLogFromFileName(fname, &file_tli, &file_segno, wal_segsz_bytes);
+
+	/*
+	 * Avoid copying files before the last common segment.
+	 *
+	 * These files are assumed to exist on source and target.
+	 * Only WAL segment files after the last common segment number on
+	 * the new source need to be copied to the new target.
+	 */
+	if (file_segno < last_common_segno)
+	{
+		pg_log_debug("WAL file entry \"%s\" not copied to target", fname);
+		return FILE_ACTION_NONE;
+	}
+
+	pg_log_debug("WAL file entry \"%s\" is copied to target", fname);
+	return FILE_ACTION_COPY;
+}
+
 /*
  * Decide what action to perform to a file.
  */
 static file_action_t
-decide_file_action(file_entry_t *entry)
+decide_file_action(file_entry_t *entry, XLogSegNo last_common_segno, int wal_segsz_bytes)
 {
 	const char *path = entry->path;
 
@@ -799,7 +853,18 @@ decide_file_action(file_entry_t *entry)
 			return FILE_ACTION_NONE;
 
 		case FILE_TYPE_REGULAR:
-			if (!entry->isrelfile)
+			if (entry->content_type == FILE_CONTENT_TYPE_WAL)
+			{
+				/* Handle WAL segment file */
+				const char *filename = last_dir_separator(entry->path);
+				if (filename == NULL)
+					filename = entry->path;
+				else
+					filename++;  /* Skip the separator */
+
+				return decide_wal_file_action(filename, last_common_segno, wal_segsz_bytes);
+			}
+			else if (entry->content_type != FILE_CONTENT_TYPE_RELATION)
 			{
 				/*
 				 * It's a non-data file that we have no special processing
@@ -858,7 +923,7 @@ decide_file_action(file_entry_t *entry)
  * should be executed.
  */
 filemap_t *
-decide_file_actions(void)
+decide_file_actions(XLogSegNo last_common_segno, int wal_segsz_bytes)
 {
 	int			i;
 	filehash_iterator it;
@@ -868,7 +933,7 @@ decide_file_actions(void)
 	filehash_start_iterate(filehash, &it);
 	while ((entry = filehash_iterate(filehash, &it)) != NULL)
 	{
-		entry->action = decide_file_action(entry);
+		entry->action = decide_file_action(entry, last_common_segno, wal_segsz_bytes);
 	}
 
 	/*
diff --git a/src/bin/pg_rewind/filemap.h b/src/bin/pg_rewind/filemap.h
index df78a02e3da..ef1f539f1d1 100644
--- a/src/bin/pg_rewind/filemap.h
+++ b/src/bin/pg_rewind/filemap.h
@@ -11,6 +11,7 @@
 #include "datapagemap.h"
 #include "storage/block.h"
 #include "storage/relfilelocator.h"
+#include "access/xlogdefs.h"
 
 /* these enum values are sorted in the order we want actions to be processed */
 typedef enum
@@ -36,6 +37,13 @@ typedef enum
 	FILE_TYPE_SYMLINK,
 } file_type_t;
 
+typedef enum
+{
+	FILE_CONTENT_TYPE_OTHER = 0,
+	FILE_CONTENT_TYPE_RELATION,
+	FILE_CONTENT_TYPE_WAL
+} file_content_type_t;
+
 /*
  * For every file found in the local or remote system, we have a file entry
  * that contains information about the file on both systems.  For relation
@@ -51,7 +59,7 @@ typedef struct file_entry_t
 	uint32		status;			/* hash status */
 
 	const char *path;
-	bool		isrelfile;		/* is it a relation data file? */
+	file_content_type_t content_type;
 
 	/*
 	 * Status of the file in the target.
@@ -106,7 +114,7 @@ extern void process_target_wal_block_change(ForkNumber forknum,
 											RelFileLocator rlocator,
 											BlockNumber blkno);
 
-extern filemap_t *decide_file_actions(void);
+extern filemap_t *decide_file_actions(XLogSegNo last_common_segno, int wal_segsz_bytes);
 extern void calculate_totals(filemap_t *filemap);
 extern void print_filemap(filemap_t *filemap);
 
diff --git a/src/bin/pg_rewind/pg_rewind.c b/src/bin/pg_rewind/pg_rewind.c
index 9d16c1e6b47..aaee5ceba84 100644
--- a/src/bin/pg_rewind/pg_rewind.c
+++ b/src/bin/pg_rewind/pg_rewind.c
@@ -153,6 +153,7 @@ main(int argc, char **argv)
 	bool		rewind_needed;
 	bool		writerecoveryconf = false;
 	filemap_t  *filemap;
+	XLogSegNo	last_common_segno;
 
 	pg_logging_init(argv[0]);
 	set_pglocale_pgservice(argv[0], PG_TEXTDOMAIN("pg_rewind"));
@@ -397,6 +398,9 @@ main(int argc, char **argv)
 					LSN_FORMAT_ARGS(divergerec),
 					targetHistory[lastcommontliIndex].tli);
 
+		/* Convert divergence LSN to segment number */
+		XLByteToSeg(divergerec, last_common_segno, ControlFile_target.xlog_seg_size);
+
 		/*
 		 * Don't need the source history anymore. The target history is still
 		 * needed by the routines in parsexlog.c, when we read the target WAL.
@@ -492,7 +496,7 @@ main(int argc, char **argv)
 	 * We have collected all information we need from both systems. Decide
 	 * what to do with each file.
 	 */
-	filemap = decide_file_actions();
+	filemap = decide_file_actions(last_common_segno, ControlFile_target.xlog_seg_size);
 	if (showprogress)
 		calculate_totals(filemap);
 
diff --git a/src/bin/pg_rewind/t/008_min_recovery_point.pl b/src/bin/pg_rewind/t/008_min_recovery_point.pl
index 28496afe350..ea279a9ee4b 100644
--- a/src/bin/pg_rewind/t/008_min_recovery_point.pl
+++ b/src/bin/pg_rewind/t/008_min_recovery_point.pl
@@ -71,6 +71,9 @@ $node_3->start;
 # Wait until node 3 has connected and caught up
 $node_1->wait_for_catchup('node_3');
 
+# Current common WAL segment
+my $wal_seg_skipped = $node_1->safe_psql('postgres', 'SELECT pg_walfile_name(pg_current_wal_lsn())');
+
 #
 # Swap the roles of node_1 and node_3, so that node_1 follows node_3.
 #
@@ -93,6 +96,10 @@ primary_conninfo='$node_3_connstr'
 ));
 $node_2->restart();
 
+# Confirm WAL segment has advanced for promotion
+my $advanced_wal_segment = $node_3->safe_psql('postgres', 'SELECT pg_walfile_name(pg_current_wal_lsn())');
+isnt($wal_seg_skipped, $advanced_wal_segment, "Expected WAL segment to have advanced");
+
 #
 # Promote node_1, to create a split-brain scenario.
 #
@@ -140,14 +147,21 @@ copy(
 	"$node_2_pgdata/postgresql.conf",
 	"$tmp_folder/node_2-postgresql.conf.tmp");
 
-command_ok(
+command_checks_all(
 	[
 		'pg_rewind',
 		'--source-server' => $node_1_connstr,
 		'--target-pgdata' => $node_2_pgdata,
 		'--debug',
 	],
-	'run pg_rewind');
+	0,
+	[
+		#  qr/"WAL file entry $wal_seg_skipped not copied to target"/
+		qr//
+	],
+	[qr//],
+	'run pg_rewind'
+);
 
 # Now move back postgresql.conf with old settings
 move(
-- 
2.47.1

