Hi,

On Fri, Mar 7, 2025 at 9:35 PM Aleksander Alekseev
<aleksan...@timescale.com> wrote:
> Your patch should target the `master` branch. Also please add a
> corresponding entry to the nearest open commitfest [1].

OK, thanks for the notice! I attach the v2 patch for the `master`
branch to this letter. Now you can also find it in commitfest in
System Administration topic.

On Fri, Mar 7, 2025 at 9:35 PM Aleksander Alekseev
<aleksan...@timescale.com> wrote:
> > In my opinion, this will be useful primarily to simplify testing, since at 
> > the moment you have to create segments manually (as in this article).
>
> In this case you should add a test or two that demonstrate this. As
> separate commits perhaps.

Well, I just saw that people have a request for such functionality.
More specifically, I think it's worth taking a look at the
src/test/modules/xid_wraparound.
There, the transaction counter is just jumping forward to check the
autovacuum and postgres wraparound limits. These tests are using the
`xid_wraparound` extension, but it can be replaced with
new pg_resetwal feature.
Measurements on my machine showed that the test execution time (with
advancing xid up to 10 billions with 100 millions values by step)
takes 40% less time to complete.

P.S.
v2 patch looks a bit scary, because it contains a lot of raw I/O
operations. At the moment, it is the best I came up with, but the
logic of the code (I hope) is correct.

--
Best regards,
Daniil Davydov
From bcca199a1787399c58c959a4357c2daefb8335f5 Mon Sep 17 00:00:00 2001
From: Daniil Davidov <d.davy...@postgrespro.ru>
Date: Tue, 11 Mar 2025 10:34:12 +0700
Subject: [PATCH v2] Arbitrary xid and mxid for resetwal

---
 src/bin/pg_resetwal/Makefile         |   4 +
 src/bin/pg_resetwal/pg_resetwal.c    | 338 ++++++++++++++++++++++++++-
 src/bin/pg_resetwal/t/003_advance.pl | 137 +++++++++++
 3 files changed, 476 insertions(+), 3 deletions(-)
 create mode 100644 src/bin/pg_resetwal/t/003_advance.pl

diff --git a/src/bin/pg_resetwal/Makefile b/src/bin/pg_resetwal/Makefile
index 82bea06dee5..1bd76180864 100644
--- a/src/bin/pg_resetwal/Makefile
+++ b/src/bin/pg_resetwal/Makefile
@@ -17,6 +17,10 @@ include $(top_builddir)/src/Makefile.global
 
 LDFLAGS_INTERNAL += -L$(top_builddir)/src/fe_utils -lpgfeutils
 
+# required for 03_advance.pl
+REGRESS_SHLIB=$(top_builddir)/src/test/regress/regress$(DLSUFFIX)
+export REGRESS_SHLIB
+
 OBJS = \
 	$(WIN32RES) \
 	pg_resetwal.o
diff --git a/src/bin/pg_resetwal/pg_resetwal.c b/src/bin/pg_resetwal/pg_resetwal.c
index 31bc0abff16..18649127446 100644
--- a/src/bin/pg_resetwal/pg_resetwal.c
+++ b/src/bin/pg_resetwal/pg_resetwal.c
@@ -65,7 +65,7 @@ static bool guessed = false;	/* T if we had to guess at any values */
 static const char *progname;
 static uint32 set_xid_epoch = (uint32) -1;
 static TransactionId set_oldest_xid = 0;
-static TransactionId set_xid = 0;
+static uint64 set_xid = 0;
 static TransactionId set_oldest_commit_ts_xid = 0;
 static TransactionId set_newest_commit_ts_xid = 0;
 static Oid	set_oid = 0;
@@ -89,7 +89,41 @@ static void KillExistingArchiveStatus(void);
 static void KillExistingWALSummaries(void);
 static void WriteEmptyXLOG(void);
 static void usage(void);
+static void AdvanceNextXid(TransactionId oldval, TransactionId newval);
+static void AdvanceNextMultiXid(MultiXactId oldval, MultiXactId newval);
 
+/*
+ * Note: this structure is copied from commit_ts.c and should be kept in sync.
+ */
+typedef struct CommitTimestampEntry
+{
+	TimestampTz time;
+	RepOriginId nodeid;
+} CommitTimestampEntry;
+
+/*
+ * Note: these macros are copied from clog.c, commit_ts.c and subtrans.c and
+ * should be kept in sync.
+ */
+#define CLOG_BITS_PER_XACT	2
+#define CLOG_XACTS_PER_BYTE 4
+#define CLOG_XACTS_PER_PAGE (BLCKSZ * CLOG_XACTS_PER_BYTE)
+
+#define TransactionIdToPgIndex(xid) ((xid) % (TransactionId) CLOG_XACTS_PER_PAGE)
+#define TransactionIdToByte(xid)	(TransactionIdToPgIndex(xid) / CLOG_XACTS_PER_BYTE)
+#define TransactionIdToBIndex(xid)	((xid) % (TransactionId) CLOG_XACTS_PER_BYTE)
+
+#define SUBTRANS_XACTS_PER_PAGE (BLCKSZ / sizeof(TransactionId))
+
+#define SizeOfCommitTimestampEntry (offsetof(CommitTimestampEntry, nodeid) + \
+									sizeof(RepOriginId))
+
+#define COMMIT_TS_XACTS_PER_PAGE \
+	(BLCKSZ / SizeOfCommitTimestampEntry)
+
+#define MULTIXACT_OFFSETS_PER_PAGE (BLCKSZ / sizeof(MultiXactOffset))
+
+#define SLRU_PAGES_PER_SEGMENT	32
 
 int
 main(int argc, char *argv[])
@@ -441,9 +475,47 @@ main(int argc, char *argv[])
 	}
 
 	if (set_xid != 0)
+	{
+		FullTransactionId current_fxid = ControlFile.checkPointCopy.nextXid;
+		FullTransactionId full_datfrozenxid;
+		uint32			  current_epoch;
+
+		full_datfrozenxid =
+			FullTransactionIdFromEpochAndXid(EpochFromFullTransactionId(current_fxid),
+											 ControlFile.checkPointCopy.oldestXid);
+
+		if (set_xid > full_datfrozenxid.value &&
+			(set_xid - full_datfrozenxid.value) > INT32_MAX)
+		{
+			/*
+			 * Cannot advance transaction ID in this case, because all unfrozen
+			 * transactions in cluster will be considered as 'future' for given
+			 * and all subsequent transaction IDs.
+			 */
+			pg_fatal("transaction ID (-x) cannot be ahead of datfrozenxid by %u", INT32_MAX);
+		}
+		else if (set_xid >= MaxTransactionId)
+		{
+			/*
+			 * Given transaction ID might exeed current epoch, so advance epoch
+			 * if needed.
+			 */
+			current_epoch = set_xid / MaxTransactionId;
+			set_xid = set_xid % MaxTransactionId;
+		}
+		else
+			current_epoch = EpochFromFullTransactionId(current_fxid);
+
 		ControlFile.checkPointCopy.nextXid =
-			FullTransactionIdFromEpochAndXid(EpochFromFullTransactionId(ControlFile.checkPointCopy.nextXid),
-											 set_xid);
+			FullTransactionIdFromEpochAndXid(current_epoch, set_xid);
+
+		if (FullTransactionIdPrecedes(current_fxid, ControlFile.checkPointCopy.nextXid) &&
+			!noupdate)
+		{
+			AdvanceNextXid(XidFromFullTransactionId(current_fxid),
+						   XidFromFullTransactionId(ControlFile.checkPointCopy.nextXid));
+		}
+	}
 
 	if (set_oldest_commit_ts_xid != 0)
 		ControlFile.checkPointCopy.oldestCommitTsXid = set_oldest_commit_ts_xid;
@@ -455,12 +527,19 @@ main(int argc, char *argv[])
 
 	if (set_mxid != 0)
 	{
+		MultiXactId current_mxid = ControlFile.checkPointCopy.nextMulti;
 		ControlFile.checkPointCopy.nextMulti = set_mxid;
 
 		ControlFile.checkPointCopy.oldestMulti = set_oldestmxid;
 		if (ControlFile.checkPointCopy.oldestMulti < FirstMultiXactId)
 			ControlFile.checkPointCopy.oldestMulti += FirstMultiXactId;
 		ControlFile.checkPointCopy.oldestMultiDB = InvalidOid;
+
+		/*
+		 * If current_mxid precedes set_mxid.
+		 */
+		if (((int32) (current_mxid - set_mxid) < 0) && !noupdate)
+			AdvanceNextMultiXid(current_mxid, set_mxid);
 	}
 
 	if (set_mxoff != -1)
@@ -1218,3 +1297,256 @@ usage(void)
 	printf(_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT);
 	printf(_("%s home page: <%s>\n"), PACKAGE_NAME, PACKAGE_URL);
 }
+
+/*
+ * Calculate how many xacts can fit one page of given SLRU type.
+ */
+static int64
+calculate_xacts_per_page(char *slru_type)
+{
+	int64 result = -1;
+
+	if (strcmp(slru_type, "pg_xact") == 0)
+		result = CLOG_XACTS_PER_PAGE;
+	else if (strcmp(slru_type, "pg_commit_ts") == 0)
+		result = COMMIT_TS_XACTS_PER_PAGE;
+	else if (strcmp(slru_type, "pg_subtrans") == 0)
+		result = SUBTRANS_XACTS_PER_PAGE;
+	else if (strcmp(slru_type, "pg_multixact/offsets") == 0)
+		result = MULTIXACT_OFFSETS_PER_PAGE;
+	else
+		pg_fatal("unknown SLRU type : %s", slru_type);
+
+	return result;
+}
+
+/*
+ * Fill given SLRU segment with zeroes.
+ */
+static void
+zero_segment(int fd, char *path)
+{
+	char zeroes[BLCKSZ] = {0};
+
+	for (int i = 0; i < SLRU_PAGES_PER_SEGMENT; i++)
+	{
+		errno = 0;
+		if (write(fd, zeroes, BLCKSZ) != BLCKSZ)
+		{
+			if (errno == 0)
+				errno = ENOSPC;
+			pg_fatal("could not write file \"%s\": %m", path);
+		}
+	}
+}
+
+/*
+ * Fill entry for given transaction ID with zeroes in clog.
+ */
+static void
+zero_clog_xact_info(int fd, char *path, char *slru_type, TransactionId xid)
+{
+	int64 pageno;
+	int byteno = TransactionIdToByte(xid);
+	int bshift = TransactionIdToBIndex(xid) * CLOG_BITS_PER_XACT;
+	char *byteptr;
+	char byteval;
+	int status = 0x00;
+	char buff[BLCKSZ];
+
+	pageno = (xid / CLOG_XACTS_PER_PAGE) % SLRU_PAGES_PER_SEGMENT;
+
+	if (lseek(fd, pageno * BLCKSZ, SEEK_SET) != pageno * BLCKSZ)
+		pg_fatal("could not iterate through file \"%s\": %m", path);
+
+	if (read(fd, buff, BLCKSZ) != BLCKSZ)
+		pg_fatal("could not read file \"%s\": %m", path);
+
+	byteptr = buff + byteno;
+
+	byteval = *byteptr;
+	byteval &= ~(((1 << CLOG_BITS_PER_XACT) - 1) << bshift);
+	byteval |= (status << bshift);
+	*byteptr = byteval;
+
+	if (write(fd, buff, BLCKSZ) != BLCKSZ)
+	{
+		if (errno == 0)
+			errno = ENOSPC;
+		pg_fatal("could not write file \"%s\": %m", path);
+	}
+}
+
+/*
+ * Fill entry for given transaction ID with zeroes in specified SLRU type.
+ */
+static void
+zero_xact_info(int fd, char *path, char *slru_type, TransactionId xid)
+{
+	int  	offset		= 0,
+			entry_size	= 0;
+	int64	pageno;
+	char	buff[BLCKSZ];
+
+	if (strcmp(slru_type, "pg_xact") == 0)
+	{
+		zero_clog_xact_info(fd, path, slru_type, xid);
+		return;
+	}
+	else if (strcmp(slru_type, "pg_commit_ts") == 0)
+	{
+		entry_size	= SizeOfCommitTimestampEntry;
+		offset		= (xid % COMMIT_TS_XACTS_PER_PAGE) * entry_size;
+		pageno		= xid / COMMIT_TS_XACTS_PER_PAGE;
+	}
+	else if (strcmp(slru_type, "pg_subtrans") == 0)
+	{
+		entry_size	= sizeof(TransactionId);
+		offset		= (xid % SUBTRANS_XACTS_PER_PAGE) * entry_size;
+		pageno		= xid / SUBTRANS_XACTS_PER_PAGE;
+	}
+	else if (strcmp(slru_type, "pg_multixact/offsets") == 0)
+	{
+		entry_size	= sizeof(MultiXactOffset);
+		offset		= (xid % MULTIXACT_OFFSETS_PER_PAGE) * entry_size;
+		pageno		= xid / MULTIXACT_OFFSETS_PER_PAGE;
+	}
+	else
+		pg_fatal("unknown SLRU type : %s", slru_type);
+
+	if (lseek(fd, pageno * BLCKSZ, SEEK_SET) != pageno * BLCKSZ)
+		pg_fatal("could not iterate through file \"%s\": %m", path);
+
+	if (read(fd, buff, BLCKSZ) != BLCKSZ)
+		pg_fatal("could not read file \"%s\": %m", path);
+
+	memset(buff + offset, 0, entry_size);
+
+	if (write(fd, buff, BLCKSZ) != BLCKSZ)
+	{
+		if (errno == 0)
+			errno = ENOSPC;
+		pg_fatal("could not write file \"%s\": %m", path);
+	}
+}
+
+/*
+ * Make sure that given xid has entry in specified SLRU type.
+ */
+static void
+enlarge_slru(TransactionId xid, char *dir)
+{
+	char  path[MAXPGPATH];
+	int   fd,
+		  flags = O_RDWR | O_APPEND | O_EXCL | PG_BINARY;
+	int64 segno,
+		  pageno,
+		  xacts_per_page;
+
+	xacts_per_page = calculate_xacts_per_page(dir);
+	pageno = xid / xacts_per_page;
+	segno = pageno / SLRU_PAGES_PER_SEGMENT;
+
+	snprintf(path, MAXPGPATH, "%s/%04X", dir, (unsigned int) segno);
+
+	errno = 0;
+	if (access(path, F_OK) != 0)
+	{
+		if (errno != ENOENT)
+			pg_fatal("cannot access file \"%s\" : %m", path);
+
+		flags |= O_CREAT;
+	}
+
+	/*
+	 * Create or open segment file
+	 */
+	fd = open(path, flags, pg_file_create_mode);
+	if (fd < 0)
+		pg_fatal("could not create/open file \"%s\": %m", path);
+
+	/*
+	 * If segment doen't exist - create segment and fill all it's pages
+	 * with zeroes.
+	 */
+	if (flags & O_CREAT)
+		zero_segment(fd, path);
+	/*
+	 * If segment already exists - fill with zeroes given transaction's
+	 * entry in it.
+	 */
+	else
+		zero_xact_info(fd, path, dir, xid);
+
+	if (fsync(fd) != 0)
+		pg_fatal("fsync error: %m");
+
+	close(fd);
+}
+
+/*
+ * Extend clog so that is can accomodate statuses of all transactions from
+ * oldval to newval.
+ */
+static void
+AdvanceNextXid(TransactionId oldval, TransactionId newval)
+{
+	int64 current_segno = -1, /* last existing slru segment */
+		  pageno,
+		  segno;
+
+	if (newval < oldval) /* handle wraparound */
+		oldval = FirstNormalTransactionId;
+	else /* oldval already has entry in clog */
+		oldval += 1;
+
+	for (TransactionId xid = oldval; xid <= newval; xid++)
+	{
+		pageno = xid / CLOG_XACTS_PER_PAGE;
+		segno = pageno / SLRU_PAGES_PER_SEGMENT;
+
+		/*
+		 * We already zeroed all necessary pages in this segment during
+		 * previous xid processing.
+		 */
+		if (segno == current_segno)
+			continue;
+
+		enlarge_slru(xid, "pg_xact");
+
+		current_segno = segno;
+	}
+
+	pageno = newval / COMMIT_TS_XACTS_PER_PAGE;
+	if (pageno > (oldval / COMMIT_TS_XACTS_PER_PAGE))
+	{
+		enlarge_slru(newval, "pg_commit_ts");
+	}
+
+	pageno = (newval / SUBTRANS_XACTS_PER_PAGE);
+	if (pageno > (oldval / SUBTRANS_XACTS_PER_PAGE))
+	{
+		enlarge_slru(newval, "pg_subtrans");
+	}
+}
+
+static void
+AdvanceNextMultiXid(MultiXactId oldval, MultiXactId newval)
+{
+	int64 current_segno = -1,
+		  pageno,
+		  segno;
+
+	for (MultiXactId mxid = oldval + 1; mxid <= newval; mxid++)
+	{
+		pageno = mxid / MULTIXACT_OFFSETS_PER_PAGE;
+		segno = pageno / SLRU_PAGES_PER_SEGMENT;
+
+		if (segno == current_segno)
+			continue;
+
+		enlarge_slru(mxid, "pg_multixact/offsets");
+
+		current_segno = segno;
+	}
+}
diff --git a/src/bin/pg_resetwal/t/003_advance.pl b/src/bin/pg_resetwal/t/003_advance.pl
new file mode 100644
index 00000000000..a28e994b10c
--- /dev/null
+++ b/src/bin/pg_resetwal/t/003_advance.pl
@@ -0,0 +1,137 @@
+use strict;
+use PostgreSQL::Test::Cluster;
+use PostgreSQL::Test::Utils;
+use Test::More;
+use File::Basename;
+
+#
+# Check whether we can set arbitrarily large values for m,o,x options
+#
+
+my $node = PostgreSQL::Test::Cluster->new('main');
+$node->init();
+$node->start();
+
+my $data_dir = $node->data_dir;
+
+# Run the regression tests
+sub run_regression
+{
+	my $dlpath = dirname($ENV{REGRESS_SHLIB});
+	my $pgregress = $ENV{PG_REGRESS};
+	my $outputdir = $PostgreSQL::Test::Utils::tmp_check;
+
+	my $rc =
+	  system($ENV{PG_REGRESS}
+	  	  . " "
+		  . "--dlpath=\"$dlpath\" "
+		  . "--bindir= "
+		  . "--host="
+		  . $node->host . " "
+		  . "--port="
+		  . $node->port . " "
+		  . "--schedule=$dlpath/parallel_schedule "
+		  . "--max-concurrent-tests=20 "
+		  . "--inputdir=\"$dlpath\" "
+		  . "--outputdir=\"$outputdir\"");
+	if ($rc != 0)
+	{
+		# Dump out the regression diffs file, if there is one
+		my $diffs = "$outputdir/regression.diffs";
+		if (-e $diffs)
+		{
+			print "=== dumping $diffs ===\n";
+			print slurp_file($diffs);
+			print "=== EOF ===\n";
+		}
+	}
+	is($rc, 0, 'regression tests pass');
+}
+
+#
+# Test -x option
+#
+
+$node->safe_psql('postgres', q(
+	CREATE TABLE test (
+		int_data  INT
+	);
+	INSERT INTO test SELECT generate_series(1, 1000);
+	BEGIN;
+	DROP TABLE test;
+	ABORT;
+));
+
+my $last_xid = $node->safe_psql('postgres', q( SELECT txid_current(); ));
+
+# Advance next xid so that it doesn't fit on existing slru segment
+my $next_xid = 0;
+for (my $count = 1; $count < 20; $count++)
+{
+	$next_xid += 500_000_000;
+
+	$node->stop();
+	system_or_bail("pg_resetwal -D $data_dir -x $next_xid");
+	$node->start();
+
+	$node->safe_psql('postgres', q(
+		VACUUM FREEZE;
+	));
+}
+
+# Check whether postgres recognized statuses of all previous transactions
+# correctly
+my $tuples_num = $node->safe_psql('postgres', q(
+	SELECT COUNT(*) FROM test;
+));
+ok($tuples_num == 1000, "we can see table 'test' and all tuples in it");
+
+#
+# Test -o option
+#
+
+my $next_oid = 100_000;
+
+$node->stop();
+system_or_bail("pg_resetwal -D $data_dir -o $next_oid");
+$node->start();
+
+$node->safe_psql('postgres', q(
+	CREATE TABLE test1 (
+		int_data INT
+	);
+));
+
+my $advanced_oid = $node->safe_psql('postgres', q(
+	SELECT oid FROM pg_class WHERE relname = 'test1';
+));
+ok($advanced_oid >= $next_oid, "oid was advanced succesfully");
+
+#
+# Test -m option
+#
+
+# Advance next multi xid so that it doesn't fit on existing slru segment
+my $next_mxid = 4_000_000;
+my $oldest_mxid = 100;
+
+$node->stop();
+system_or_bail("pg_resetwal -D $data_dir -m $next_mxid,$oldest_mxid");
+$node->start();
+
+# Check whether all works properly
+$node->safe_psql('postgres', q(
+	CREATE TABLE test2 (
+		int_data INT
+	);
+	INSERT INTO test2 SELECT generate_series(1, 1000);
+));
+
+#
+# Run regression tests to make sure that postgres is working normally after all
+# manipulatons
+#
+run_regression();
+
+$node->stop();
+done_testing();
-- 
2.43.0

Reply via email to