Hi Sutou,

Thank you for picking it up!

On Fri, 12 Jul 2024 at 09:24, Sutou Kouhei <k...@clear-code.com> wrote:

Here are my review comments:
>
> @@ -217,6 +221,26 @@ findLastCheckpoint(const char *datadir, XLogRecPtr
> forkptr, int tliIndex,
> +                       char            xlogfname[MAXFNAMELEN];
> +
> +                       tli = xlogreader->seg.ws_tli;
> +                       segno = xlogreader->seg.ws_segno;
> +
> +                       snprintf(xlogfname, MAXPGPATH, XLOGDIR "/");
> +                       XLogFileName(xlogfname + strlen(xlogfname),
> +                                                xlogreader->seg.ws_tli,
> +                                                xlogreader->seg.ws_segno,
> WalSegSz);
> +
> +                       /*
> +                        * Make sure pg_rewind doesn't remove this file,
> because it is
> +                        * required for postgres to start after rewind.
> +                        */
> +                       insert_keepwalhash_entry(xlogfname);
>
> MAXFNAMELEN is 64 and MAXPGPATH is 1024. strlen(XLOGDIR "/")
> is 7 because XLOGDIR is "pg_wal". So xlogfname has enough
> size but snprintf(xlogfname, MAXPGPATH) is wrong usage.
> (And XLogFileName() uses snprintf(xlogfname, MAXFNAMELEN)
> internally.)
>

Nice catch!

I don't think we need another buffer here, just need to use MAXFNAMELEN,
because strlen("pg_wal/$wal_filename") + 1 = 32 perfectly fits into 64
bytes.

The new version is attached.

Regards,
--
Alexander Kukushkin
From 19e9a3472782bdc92d0811e27cc3dd0fdcbf965d Mon Sep 17 00:00:00 2001
From: Alexander Kukushkin <cyberd...@gmail.com>
Date: Fri, 12 Jul 2024 10:50:22 +0200
Subject: [PATCH v10] Be more picky with WAL segment deletion in pg_rewind

Make pg_rewind to be a bit wiser in terms of creating filemap:
preserve on the target all WAL segments that contain records between the
last common checkpoint and the point of divergence.

Co-authored-by: Polina Bungina <bung...@gmail.com>
---
 src/bin/pg_rewind/filemap.c                   | 62 +++++++++++++++++-
 src/bin/pg_rewind/filemap.h                   |  3 +
 src/bin/pg_rewind/meson.build                 |  1 +
 src/bin/pg_rewind/parsexlog.c                 | 24 +++++++
 src/bin/pg_rewind/pg_rewind.c                 |  3 +
 src/bin/pg_rewind/t/010_keep_recycled_wals.pl | 65 +++++++++++++++++++
 6 files changed, 157 insertions(+), 1 deletion(-)
 create mode 100644 src/bin/pg_rewind/t/010_keep_recycled_wals.pl

diff --git a/src/bin/pg_rewind/filemap.c b/src/bin/pg_rewind/filemap.c
index 4458324c9d..b357c28338 100644
--- a/src/bin/pg_rewind/filemap.c
+++ b/src/bin/pg_rewind/filemap.c
@@ -63,6 +63,28 @@ static file_entry_t *lookup_filehash_entry(const char *path);
 static int	final_filemap_cmp(const void *a, const void *b);
 static bool check_file_excluded(const char *path, bool is_source);
 
+typedef struct skipwal_t
+{
+	const char *path;
+	uint32		status;
+}			skipwal_t;
+
+#define SH_PREFIX		keepwalhash
+#define SH_ELEMENT_TYPE	skipwal_t
+#define SH_KEY_TYPE		const char *
+#define	SH_KEY			path
+#define SH_HASH_KEY(tb, key)	hash_string(key)
+#define SH_EQUAL(tb, a, b)		(strcmp(a, b) == 0)
+#define	SH_SCOPE		static inline
+#define SH_RAW_ALLOCATOR	pg_malloc0
+#define SH_DECLARE
+#define SH_DEFINE
+#include "lib/simplehash.h"
+
+static keepwalhash_hash * keepwalhash = NULL;
+
+static bool keepwalhash_entry_exists(const char *path);
+
 /*
  * Definition of one element part of an exclusion list, used to exclude
  * contents when rewinding.  "name" is the name of the file or path to
@@ -206,6 +228,35 @@ lookup_filehash_entry(const char *path)
 	return filehash_lookup(filehash, path);
 }
 
+/* Initialize a hash table to store WAL file names that must be kept */
+void
+keepwalhash_init(void)
+{
+	keepwalhash = keepwalhash_create(FILEHASH_INITIAL_SIZE, NULL);
+}
+
+/* Prevent a given file deletion during rewind */
+void
+insert_keepwalhash_entry(const char *path)
+{
+	skipwal_t  *entry;
+	bool		found;
+
+	/* Should only be called with keepwalhash initialized */
+	Assert(keepwalhash);
+
+	entry = keepwalhash_insert(keepwalhash, path, &found);
+
+	if (!found)
+		entry->path = pg_strdup(path);
+}
+
+static bool
+keepwalhash_entry_exists(const char *path)
+{
+	return keepwalhash_lookup(keepwalhash, path) != NULL;
+}
+
 /*
  * Callback for processing source file list.
  *
@@ -685,7 +736,16 @@ decide_file_action(file_entry_t *entry)
 	}
 	else if (entry->target_exists && !entry->source_exists)
 	{
-		/* File exists in target, but not source. Remove it. */
+		/* File exists in target, but not source. */
+
+		if (keepwalhash_entry_exists(path))
+		{
+			/* This is a WAL file that should be kept. */
+			pg_log_debug("Not removing %s because it is required for recovery", path);
+			return FILE_ACTION_NONE;
+		}
+
+		/* Otherwise remove an unexpected file. */
 		return FILE_ACTION_REMOVE;
 	}
 	else if (!entry->target_exists && !entry->source_exists)
diff --git a/src/bin/pg_rewind/filemap.h b/src/bin/pg_rewind/filemap.h
index 007e0f17cf..0cb6fcae00 100644
--- a/src/bin/pg_rewind/filemap.h
+++ b/src/bin/pg_rewind/filemap.h
@@ -110,4 +110,7 @@ extern filemap_t *decide_file_actions(void);
 extern void calculate_totals(filemap_t *filemap);
 extern void print_filemap(filemap_t *filemap);
 
+extern void keepwalhash_init(void);
+extern void insert_keepwalhash_entry(const char *path);
+
 #endif							/* FILEMAP_H */
diff --git a/src/bin/pg_rewind/meson.build b/src/bin/pg_rewind/meson.build
index e0f88bde22..200ebf84eb 100644
--- a/src/bin/pg_rewind/meson.build
+++ b/src/bin/pg_rewind/meson.build
@@ -43,6 +43,7 @@ tests += {
       't/007_standby_source.pl',
       't/008_min_recovery_point.pl',
       't/009_growing_files.pl',
+      't/010_keep_recycled_wals.pl',
     ],
   },
 }
diff --git a/src/bin/pg_rewind/parsexlog.c b/src/bin/pg_rewind/parsexlog.c
index 22f7351fdc..64d584db4d 100644
--- a/src/bin/pg_rewind/parsexlog.c
+++ b/src/bin/pg_rewind/parsexlog.c
@@ -176,6 +176,10 @@ findLastCheckpoint(const char *datadir, XLogRecPtr forkptr, int tliIndex,
 	char	   *errormsg;
 	XLogPageReadPrivate private;
 
+	/* Track WAL segments opened while searching a checkpoint */
+	XLogSegNo	segno = 0;
+	TimeLineID	tli = 0;
+
 	/*
 	 * The given fork pointer points to the end of the last common record,
 	 * which is not necessarily the beginning of the next record, if the
@@ -217,6 +221,26 @@ findLastCheckpoint(const char *datadir, XLogRecPtr forkptr, int tliIndex,
 						 LSN_FORMAT_ARGS(searchptr));
 		}
 
+		/* We are trying to detect if the new WAL file was opened */
+		if (xlogreader->seg.ws_tli != tli || xlogreader->seg.ws_segno != segno)
+		{
+			char		xlogfname[MAXFNAMELEN];
+
+			snprintf(xlogfname, MAXFNAMELEN, XLOGDIR "/");
+
+			/* memorize new values of tli and segno */
+			tli = xlogreader->seg.ws_tli;
+			segno = xlogreader->seg.ws_segno;
+
+			XLogFileName(xlogfname + sizeof(XLOGDIR), tli, segno, WalSegSz);
+
+			/*
+			 * Make sure pg_rewind doesn't remove this file, because it is
+			 * required for postgres to start after rewind.
+			 */
+			insert_keepwalhash_entry(xlogfname);
+		}
+
 		/*
 		 * Check if it is a checkpoint record. This checkpoint record needs to
 		 * be the latest checkpoint before WAL forked and not the checkpoint
diff --git a/src/bin/pg_rewind/pg_rewind.c b/src/bin/pg_rewind/pg_rewind.c
index 0841ab4135..48c11417b2 100644
--- a/src/bin/pg_rewind/pg_rewind.c
+++ b/src/bin/pg_rewind/pg_rewind.c
@@ -455,6 +455,9 @@ main(int argc, char **argv)
 		exit(0);
 	}
 
+	/* Hash to memorize WAL files that should be kept */
+	keepwalhash_init();
+
 	findLastCheckpoint(datadir_target, divergerec, lastcommontliIndex,
 					   &chkptrec, &chkpttli, &chkptredo, restore_command);
 	pg_log_info("rewinding from last common checkpoint at %X/%X on timeline %u",
diff --git a/src/bin/pg_rewind/t/010_keep_recycled_wals.pl b/src/bin/pg_rewind/t/010_keep_recycled_wals.pl
new file mode 100644
index 0000000000..65caaf2faa
--- /dev/null
+++ b/src/bin/pg_rewind/t/010_keep_recycled_wals.pl
@@ -0,0 +1,65 @@
+
+# Copyright (c) 2021-2024, PostgreSQL Global Development Group
+
+#
+# Test situation where a target data directory contains
+# WAL files that were already recycled by the new primary.
+#
+
+use strict;
+use warnings;
+use PostgreSQL::Test::Utils;
+use Test::More;
+
+use FindBin;
+use lib $FindBin::RealBin;
+
+use RewindTest;
+
+RewindTest::setup_cluster();
+$node_primary->enable_archiving();
+RewindTest::start_primary();
+
+RewindTest::create_standby();
+$node_standby->enable_restoring($node_primary, 0);
+$node_standby->reload();
+
+RewindTest::primary_psql("CHECKPOINT");  # last common checkpoint
+
+# We use "perl -e 'exit(1)'" as an alternative to "false", because the last one
+# might not be available on Windows, but we want to run tests cross-platform.
+my $false = "$^X -e 'exit(1)'";
+$node_primary->append_conf(
+	'postgresql.conf', qq(
+archive_command = '$false'
+));
+$node_primary->reload();
+
+# advance WAL on the primary; WAL segment will never make it to the archive
+RewindTest::primary_psql("CREATE TABLE t(a int)");
+RewindTest::primary_psql("INSERT INTO t VALUES(0)");
+RewindTest::primary_psql("SELECT pg_switch_wal()");
+
+RewindTest::promote_standby;
+
+# new primary loses diverging WAL segment
+RewindTest::standby_psql("INSERT INTO t values(0)");
+RewindTest::standby_psql("SELECT pg_switch_wal()");
+
+$node_standby->stop();
+$node_primary->stop();
+
+my ($stdout, $stderr) = run_command(
+	[
+		'pg_rewind', '--debug',
+		'--source-pgdata', $node_standby->data_dir,
+		'--target-pgdata', $node_primary->data_dir,
+		'--no-sync',
+	]);
+
+like(
+	$stderr,
+	qr/Not removing pg_wal.* because it is required for recovery/,
+	"some WAL files were skipped");
+
+done_testing();
-- 
2.34.1

Reply via email to