Hi, On Fri, Mar 7, 2025 at 9:35 PM Aleksander Alekseev <aleksan...@timescale.com> wrote: > Your patch should target the `master` branch. Also please add a > corresponding entry to the nearest open commitfest [1].
OK, thanks for the notice! I attach the v2 patch for the `master` branch to this letter. Now you can also find it in commitfest in System Administration topic. On Fri, Mar 7, 2025 at 9:35 PM Aleksander Alekseev <aleksan...@timescale.com> wrote: > > In my opinion, this will be useful primarily to simplify testing, since at > > the moment you have to create segments manually (as in this article). > > In this case you should add a test or two that demonstrate this. As > separate commits perhaps. Well, I just saw that people have a request for such functionality. More specifically, I think it's worth taking a look at the src/test/modules/xid_wraparound. There, the transaction counter is just jumping forward to check the autovacuum and postgres wraparound limits. These tests are using the `xid_wraparound` extension, but it can be replaced with new pg_resetwal feature. Measurements on my machine showed that the test execution time (with advancing xid up to 10 billions with 100 millions values by step) takes 40% less time to complete. P.S. v2 patch looks a bit scary, because it contains a lot of raw I/O operations. At the moment, it is the best I came up with, but the logic of the code (I hope) is correct. -- Best regards, Daniil Davydov
From bcca199a1787399c58c959a4357c2daefb8335f5 Mon Sep 17 00:00:00 2001 From: Daniil Davidov <d.davy...@postgrespro.ru> Date: Tue, 11 Mar 2025 10:34:12 +0700 Subject: [PATCH v2] Arbitrary xid and mxid for resetwal --- src/bin/pg_resetwal/Makefile | 4 + src/bin/pg_resetwal/pg_resetwal.c | 338 ++++++++++++++++++++++++++- src/bin/pg_resetwal/t/003_advance.pl | 137 +++++++++++ 3 files changed, 476 insertions(+), 3 deletions(-) create mode 100644 src/bin/pg_resetwal/t/003_advance.pl diff --git a/src/bin/pg_resetwal/Makefile b/src/bin/pg_resetwal/Makefile index 82bea06dee5..1bd76180864 100644 --- a/src/bin/pg_resetwal/Makefile +++ b/src/bin/pg_resetwal/Makefile @@ -17,6 +17,10 @@ include $(top_builddir)/src/Makefile.global LDFLAGS_INTERNAL += -L$(top_builddir)/src/fe_utils -lpgfeutils +# required for 03_advance.pl +REGRESS_SHLIB=$(top_builddir)/src/test/regress/regress$(DLSUFFIX) +export REGRESS_SHLIB + OBJS = \ $(WIN32RES) \ pg_resetwal.o diff --git a/src/bin/pg_resetwal/pg_resetwal.c b/src/bin/pg_resetwal/pg_resetwal.c index 31bc0abff16..18649127446 100644 --- a/src/bin/pg_resetwal/pg_resetwal.c +++ b/src/bin/pg_resetwal/pg_resetwal.c @@ -65,7 +65,7 @@ static bool guessed = false; /* T if we had to guess at any values */ static const char *progname; static uint32 set_xid_epoch = (uint32) -1; static TransactionId set_oldest_xid = 0; -static TransactionId set_xid = 0; +static uint64 set_xid = 0; static TransactionId set_oldest_commit_ts_xid = 0; static TransactionId set_newest_commit_ts_xid = 0; static Oid set_oid = 0; @@ -89,7 +89,41 @@ static void KillExistingArchiveStatus(void); static void KillExistingWALSummaries(void); static void WriteEmptyXLOG(void); static void usage(void); +static void AdvanceNextXid(TransactionId oldval, TransactionId newval); +static void AdvanceNextMultiXid(MultiXactId oldval, MultiXactId newval); +/* + * Note: this structure is copied from commit_ts.c and should be kept in sync. + */ +typedef struct CommitTimestampEntry +{ + TimestampTz time; + RepOriginId nodeid; +} CommitTimestampEntry; + +/* + * Note: these macros are copied from clog.c, commit_ts.c and subtrans.c and + * should be kept in sync. + */ +#define CLOG_BITS_PER_XACT 2 +#define CLOG_XACTS_PER_BYTE 4 +#define CLOG_XACTS_PER_PAGE (BLCKSZ * CLOG_XACTS_PER_BYTE) + +#define TransactionIdToPgIndex(xid) ((xid) % (TransactionId) CLOG_XACTS_PER_PAGE) +#define TransactionIdToByte(xid) (TransactionIdToPgIndex(xid) / CLOG_XACTS_PER_BYTE) +#define TransactionIdToBIndex(xid) ((xid) % (TransactionId) CLOG_XACTS_PER_BYTE) + +#define SUBTRANS_XACTS_PER_PAGE (BLCKSZ / sizeof(TransactionId)) + +#define SizeOfCommitTimestampEntry (offsetof(CommitTimestampEntry, nodeid) + \ + sizeof(RepOriginId)) + +#define COMMIT_TS_XACTS_PER_PAGE \ + (BLCKSZ / SizeOfCommitTimestampEntry) + +#define MULTIXACT_OFFSETS_PER_PAGE (BLCKSZ / sizeof(MultiXactOffset)) + +#define SLRU_PAGES_PER_SEGMENT 32 int main(int argc, char *argv[]) @@ -441,9 +475,47 @@ main(int argc, char *argv[]) } if (set_xid != 0) + { + FullTransactionId current_fxid = ControlFile.checkPointCopy.nextXid; + FullTransactionId full_datfrozenxid; + uint32 current_epoch; + + full_datfrozenxid = + FullTransactionIdFromEpochAndXid(EpochFromFullTransactionId(current_fxid), + ControlFile.checkPointCopy.oldestXid); + + if (set_xid > full_datfrozenxid.value && + (set_xid - full_datfrozenxid.value) > INT32_MAX) + { + /* + * Cannot advance transaction ID in this case, because all unfrozen + * transactions in cluster will be considered as 'future' for given + * and all subsequent transaction IDs. + */ + pg_fatal("transaction ID (-x) cannot be ahead of datfrozenxid by %u", INT32_MAX); + } + else if (set_xid >= MaxTransactionId) + { + /* + * Given transaction ID might exeed current epoch, so advance epoch + * if needed. + */ + current_epoch = set_xid / MaxTransactionId; + set_xid = set_xid % MaxTransactionId; + } + else + current_epoch = EpochFromFullTransactionId(current_fxid); + ControlFile.checkPointCopy.nextXid = - FullTransactionIdFromEpochAndXid(EpochFromFullTransactionId(ControlFile.checkPointCopy.nextXid), - set_xid); + FullTransactionIdFromEpochAndXid(current_epoch, set_xid); + + if (FullTransactionIdPrecedes(current_fxid, ControlFile.checkPointCopy.nextXid) && + !noupdate) + { + AdvanceNextXid(XidFromFullTransactionId(current_fxid), + XidFromFullTransactionId(ControlFile.checkPointCopy.nextXid)); + } + } if (set_oldest_commit_ts_xid != 0) ControlFile.checkPointCopy.oldestCommitTsXid = set_oldest_commit_ts_xid; @@ -455,12 +527,19 @@ main(int argc, char *argv[]) if (set_mxid != 0) { + MultiXactId current_mxid = ControlFile.checkPointCopy.nextMulti; ControlFile.checkPointCopy.nextMulti = set_mxid; ControlFile.checkPointCopy.oldestMulti = set_oldestmxid; if (ControlFile.checkPointCopy.oldestMulti < FirstMultiXactId) ControlFile.checkPointCopy.oldestMulti += FirstMultiXactId; ControlFile.checkPointCopy.oldestMultiDB = InvalidOid; + + /* + * If current_mxid precedes set_mxid. + */ + if (((int32) (current_mxid - set_mxid) < 0) && !noupdate) + AdvanceNextMultiXid(current_mxid, set_mxid); } if (set_mxoff != -1) @@ -1218,3 +1297,256 @@ usage(void) printf(_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT); printf(_("%s home page: <%s>\n"), PACKAGE_NAME, PACKAGE_URL); } + +/* + * Calculate how many xacts can fit one page of given SLRU type. + */ +static int64 +calculate_xacts_per_page(char *slru_type) +{ + int64 result = -1; + + if (strcmp(slru_type, "pg_xact") == 0) + result = CLOG_XACTS_PER_PAGE; + else if (strcmp(slru_type, "pg_commit_ts") == 0) + result = COMMIT_TS_XACTS_PER_PAGE; + else if (strcmp(slru_type, "pg_subtrans") == 0) + result = SUBTRANS_XACTS_PER_PAGE; + else if (strcmp(slru_type, "pg_multixact/offsets") == 0) + result = MULTIXACT_OFFSETS_PER_PAGE; + else + pg_fatal("unknown SLRU type : %s", slru_type); + + return result; +} + +/* + * Fill given SLRU segment with zeroes. + */ +static void +zero_segment(int fd, char *path) +{ + char zeroes[BLCKSZ] = {0}; + + for (int i = 0; i < SLRU_PAGES_PER_SEGMENT; i++) + { + errno = 0; + if (write(fd, zeroes, BLCKSZ) != BLCKSZ) + { + if (errno == 0) + errno = ENOSPC; + pg_fatal("could not write file \"%s\": %m", path); + } + } +} + +/* + * Fill entry for given transaction ID with zeroes in clog. + */ +static void +zero_clog_xact_info(int fd, char *path, char *slru_type, TransactionId xid) +{ + int64 pageno; + int byteno = TransactionIdToByte(xid); + int bshift = TransactionIdToBIndex(xid) * CLOG_BITS_PER_XACT; + char *byteptr; + char byteval; + int status = 0x00; + char buff[BLCKSZ]; + + pageno = (xid / CLOG_XACTS_PER_PAGE) % SLRU_PAGES_PER_SEGMENT; + + if (lseek(fd, pageno * BLCKSZ, SEEK_SET) != pageno * BLCKSZ) + pg_fatal("could not iterate through file \"%s\": %m", path); + + if (read(fd, buff, BLCKSZ) != BLCKSZ) + pg_fatal("could not read file \"%s\": %m", path); + + byteptr = buff + byteno; + + byteval = *byteptr; + byteval &= ~(((1 << CLOG_BITS_PER_XACT) - 1) << bshift); + byteval |= (status << bshift); + *byteptr = byteval; + + if (write(fd, buff, BLCKSZ) != BLCKSZ) + { + if (errno == 0) + errno = ENOSPC; + pg_fatal("could not write file \"%s\": %m", path); + } +} + +/* + * Fill entry for given transaction ID with zeroes in specified SLRU type. + */ +static void +zero_xact_info(int fd, char *path, char *slru_type, TransactionId xid) +{ + int offset = 0, + entry_size = 0; + int64 pageno; + char buff[BLCKSZ]; + + if (strcmp(slru_type, "pg_xact") == 0) + { + zero_clog_xact_info(fd, path, slru_type, xid); + return; + } + else if (strcmp(slru_type, "pg_commit_ts") == 0) + { + entry_size = SizeOfCommitTimestampEntry; + offset = (xid % COMMIT_TS_XACTS_PER_PAGE) * entry_size; + pageno = xid / COMMIT_TS_XACTS_PER_PAGE; + } + else if (strcmp(slru_type, "pg_subtrans") == 0) + { + entry_size = sizeof(TransactionId); + offset = (xid % SUBTRANS_XACTS_PER_PAGE) * entry_size; + pageno = xid / SUBTRANS_XACTS_PER_PAGE; + } + else if (strcmp(slru_type, "pg_multixact/offsets") == 0) + { + entry_size = sizeof(MultiXactOffset); + offset = (xid % MULTIXACT_OFFSETS_PER_PAGE) * entry_size; + pageno = xid / MULTIXACT_OFFSETS_PER_PAGE; + } + else + pg_fatal("unknown SLRU type : %s", slru_type); + + if (lseek(fd, pageno * BLCKSZ, SEEK_SET) != pageno * BLCKSZ) + pg_fatal("could not iterate through file \"%s\": %m", path); + + if (read(fd, buff, BLCKSZ) != BLCKSZ) + pg_fatal("could not read file \"%s\": %m", path); + + memset(buff + offset, 0, entry_size); + + if (write(fd, buff, BLCKSZ) != BLCKSZ) + { + if (errno == 0) + errno = ENOSPC; + pg_fatal("could not write file \"%s\": %m", path); + } +} + +/* + * Make sure that given xid has entry in specified SLRU type. + */ +static void +enlarge_slru(TransactionId xid, char *dir) +{ + char path[MAXPGPATH]; + int fd, + flags = O_RDWR | O_APPEND | O_EXCL | PG_BINARY; + int64 segno, + pageno, + xacts_per_page; + + xacts_per_page = calculate_xacts_per_page(dir); + pageno = xid / xacts_per_page; + segno = pageno / SLRU_PAGES_PER_SEGMENT; + + snprintf(path, MAXPGPATH, "%s/%04X", dir, (unsigned int) segno); + + errno = 0; + if (access(path, F_OK) != 0) + { + if (errno != ENOENT) + pg_fatal("cannot access file \"%s\" : %m", path); + + flags |= O_CREAT; + } + + /* + * Create or open segment file + */ + fd = open(path, flags, pg_file_create_mode); + if (fd < 0) + pg_fatal("could not create/open file \"%s\": %m", path); + + /* + * If segment doen't exist - create segment and fill all it's pages + * with zeroes. + */ + if (flags & O_CREAT) + zero_segment(fd, path); + /* + * If segment already exists - fill with zeroes given transaction's + * entry in it. + */ + else + zero_xact_info(fd, path, dir, xid); + + if (fsync(fd) != 0) + pg_fatal("fsync error: %m"); + + close(fd); +} + +/* + * Extend clog so that is can accomodate statuses of all transactions from + * oldval to newval. + */ +static void +AdvanceNextXid(TransactionId oldval, TransactionId newval) +{ + int64 current_segno = -1, /* last existing slru segment */ + pageno, + segno; + + if (newval < oldval) /* handle wraparound */ + oldval = FirstNormalTransactionId; + else /* oldval already has entry in clog */ + oldval += 1; + + for (TransactionId xid = oldval; xid <= newval; xid++) + { + pageno = xid / CLOG_XACTS_PER_PAGE; + segno = pageno / SLRU_PAGES_PER_SEGMENT; + + /* + * We already zeroed all necessary pages in this segment during + * previous xid processing. + */ + if (segno == current_segno) + continue; + + enlarge_slru(xid, "pg_xact"); + + current_segno = segno; + } + + pageno = newval / COMMIT_TS_XACTS_PER_PAGE; + if (pageno > (oldval / COMMIT_TS_XACTS_PER_PAGE)) + { + enlarge_slru(newval, "pg_commit_ts"); + } + + pageno = (newval / SUBTRANS_XACTS_PER_PAGE); + if (pageno > (oldval / SUBTRANS_XACTS_PER_PAGE)) + { + enlarge_slru(newval, "pg_subtrans"); + } +} + +static void +AdvanceNextMultiXid(MultiXactId oldval, MultiXactId newval) +{ + int64 current_segno = -1, + pageno, + segno; + + for (MultiXactId mxid = oldval + 1; mxid <= newval; mxid++) + { + pageno = mxid / MULTIXACT_OFFSETS_PER_PAGE; + segno = pageno / SLRU_PAGES_PER_SEGMENT; + + if (segno == current_segno) + continue; + + enlarge_slru(mxid, "pg_multixact/offsets"); + + current_segno = segno; + } +} diff --git a/src/bin/pg_resetwal/t/003_advance.pl b/src/bin/pg_resetwal/t/003_advance.pl new file mode 100644 index 00000000000..a28e994b10c --- /dev/null +++ b/src/bin/pg_resetwal/t/003_advance.pl @@ -0,0 +1,137 @@ +use strict; +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; +use Test::More; +use File::Basename; + +# +# Check whether we can set arbitrarily large values for m,o,x options +# + +my $node = PostgreSQL::Test::Cluster->new('main'); +$node->init(); +$node->start(); + +my $data_dir = $node->data_dir; + +# Run the regression tests +sub run_regression +{ + my $dlpath = dirname($ENV{REGRESS_SHLIB}); + my $pgregress = $ENV{PG_REGRESS}; + my $outputdir = $PostgreSQL::Test::Utils::tmp_check; + + my $rc = + system($ENV{PG_REGRESS} + . " " + . "--dlpath=\"$dlpath\" " + . "--bindir= " + . "--host=" + . $node->host . " " + . "--port=" + . $node->port . " " + . "--schedule=$dlpath/parallel_schedule " + . "--max-concurrent-tests=20 " + . "--inputdir=\"$dlpath\" " + . "--outputdir=\"$outputdir\""); + if ($rc != 0) + { + # Dump out the regression diffs file, if there is one + my $diffs = "$outputdir/regression.diffs"; + if (-e $diffs) + { + print "=== dumping $diffs ===\n"; + print slurp_file($diffs); + print "=== EOF ===\n"; + } + } + is($rc, 0, 'regression tests pass'); +} + +# +# Test -x option +# + +$node->safe_psql('postgres', q( + CREATE TABLE test ( + int_data INT + ); + INSERT INTO test SELECT generate_series(1, 1000); + BEGIN; + DROP TABLE test; + ABORT; +)); + +my $last_xid = $node->safe_psql('postgres', q( SELECT txid_current(); )); + +# Advance next xid so that it doesn't fit on existing slru segment +my $next_xid = 0; +for (my $count = 1; $count < 20; $count++) +{ + $next_xid += 500_000_000; + + $node->stop(); + system_or_bail("pg_resetwal -D $data_dir -x $next_xid"); + $node->start(); + + $node->safe_psql('postgres', q( + VACUUM FREEZE; + )); +} + +# Check whether postgres recognized statuses of all previous transactions +# correctly +my $tuples_num = $node->safe_psql('postgres', q( + SELECT COUNT(*) FROM test; +)); +ok($tuples_num == 1000, "we can see table 'test' and all tuples in it"); + +# +# Test -o option +# + +my $next_oid = 100_000; + +$node->stop(); +system_or_bail("pg_resetwal -D $data_dir -o $next_oid"); +$node->start(); + +$node->safe_psql('postgres', q( + CREATE TABLE test1 ( + int_data INT + ); +)); + +my $advanced_oid = $node->safe_psql('postgres', q( + SELECT oid FROM pg_class WHERE relname = 'test1'; +)); +ok($advanced_oid >= $next_oid, "oid was advanced succesfully"); + +# +# Test -m option +# + +# Advance next multi xid so that it doesn't fit on existing slru segment +my $next_mxid = 4_000_000; +my $oldest_mxid = 100; + +$node->stop(); +system_or_bail("pg_resetwal -D $data_dir -m $next_mxid,$oldest_mxid"); +$node->start(); + +# Check whether all works properly +$node->safe_psql('postgres', q( + CREATE TABLE test2 ( + int_data INT + ); + INSERT INTO test2 SELECT generate_series(1, 1000); +)); + +# +# Run regression tests to make sure that postgres is working normally after all +# manipulatons +# +run_regression(); + +$node->stop(); +done_testing(); -- 2.43.0