Hi!

Sorry for delay. I was a bit busy last month. Anyway, here is my
proposal for making multioffsets 64 bit.
The patch set consists of three parts:
0001 - making user output of offsets 64-bit ready;
0002 - making offsets 64-bit;
0003 - provide 32 to 64 bit conversion in pg_upgarde.

I'm pretty sure this is just a beginning of the conversation, so any
opinions and reviews, as always, are very welcome!

-- 
Best regards,
Maxim Orlov.
From 2e1f05b3b0504153e57188e968bb19cb6741c087 Mon Sep 17 00:00:00 2001
From: Maxim Orlov <m.orlov@postgrespro.ru>
Date: Wed, 6 Mar 2024 11:11:33 +0300
Subject: [PATCH v1 2/3] Use 64-bit multixact offsets.

Author: Maxim Orlov <orlovmg@gmail.com>
---
 src/backend/access/transam/multixact.c | 182 ++-----------------------
 src/bin/pg_resetwal/pg_resetwal.c      |   2 +-
 src/bin/pg_resetwal/t/001_basic.pl     |   2 +-
 src/include/access/multixact.h         |   2 +-
 src/include/c.h                        |   2 +-
 5 files changed, 16 insertions(+), 174 deletions(-)

diff --git a/src/backend/access/transam/multixact.c b/src/backend/access/transam/multixact.c
index 57c5148933..f2a2aa9547 100644
--- a/src/backend/access/transam/multixact.c
+++ b/src/backend/access/transam/multixact.c
@@ -95,14 +95,6 @@
 /*
  * Defines for MultiXactOffset page sizes.  A page is the same BLCKSZ as is
  * used everywhere else in Postgres.
- *
- * Note: because MultiXactOffsets are 32 bits and wrap around at 0xFFFFFFFF,
- * MultiXact page numbering also wraps around at
- * 0xFFFFFFFF/MULTIXACT_OFFSETS_PER_PAGE, and segment numbering at
- * 0xFFFFFFFF/MULTIXACT_OFFSETS_PER_PAGE/SLRU_PAGES_PER_SEGMENT.  We need
- * take no explicit notice of that fact in this module, except when comparing
- * segment and page numbers in TruncateMultiXact (see
- * MultiXactOffsetPagePrecedes).
  */
 
 /* We need four bytes per offset */
@@ -174,7 +166,7 @@ MXOffsetToMemberPage(MultiXactOffset offset)
 	return offset / MULTIXACT_MEMBERS_PER_PAGE;
 }
 
-static inline int
+static inline int64
 MXOffsetToMemberSegment(MultiXactOffset offset)
 {
 	return MXOffsetToMemberPage(offset) / SLRU_PAGES_PER_SEGMENT;
@@ -271,9 +263,6 @@ typedef struct MultiXactStateData
 	MultiXactId multiStopLimit;
 	MultiXactId multiWrapLimit;
 
-	/* support for members anti-wraparound measures */
-	MultiXactOffset offsetStopLimit;	/* known if oldestOffsetKnown */
-
 	/*
 	 * This is used to sleep until a multixact offset is written when we want
 	 * to create the next one.
@@ -408,8 +397,6 @@ static bool MultiXactOffsetPrecedes(MultiXactOffset offset1,
 									MultiXactOffset offset2);
 static void ExtendMultiXactOffset(MultiXactId multi);
 static void ExtendMultiXactMember(MultiXactOffset offset, int nmembers);
-static bool MultiXactOffsetWouldWrap(MultiXactOffset boundary,
-									 MultiXactOffset start, uint32 distance);
 static bool SetOffsetVacuumLimit(bool is_startup);
 static bool find_multixact_start(MultiXactId multi, MultiXactOffset *result);
 static void WriteMZeroPageXlogRec(int64 pageno, uint8 info);
@@ -1158,78 +1145,6 @@ GetNewMultiXactId(int nmembers, MultiXactOffset *offset)
 	else
 		*offset = nextOffset;
 
-	/*----------
-	 * Protect against overrun of the members space as well, with the
-	 * following rules:
-	 *
-	 * If we're past offsetStopLimit, refuse to generate more multis.
-	 * If we're close to offsetStopLimit, emit a warning.
-	 *
-	 * Arbitrarily, we start emitting warnings when we're 20 segments or less
-	 * from offsetStopLimit.
-	 *
-	 * Note we haven't updated the shared state yet, so if we fail at this
-	 * point, the multixact ID we grabbed can still be used by the next guy.
-	 *
-	 * Note that there is no point in forcing autovacuum runs here: the
-	 * multixact freeze settings would have to be reduced for that to have any
-	 * effect.
-	 *----------
-	 */
-#define OFFSET_WARN_SEGMENTS	20
-	if (MultiXactState->oldestOffsetKnown &&
-		MultiXactOffsetWouldWrap(MultiXactState->offsetStopLimit, nextOffset,
-								 nmembers))
-	{
-		/* see comment in the corresponding offsets wraparound case */
-		SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER);
-
-		ereport(ERROR,
-				(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
-				 errmsg("multixact \"members\" limit exceeded"),
-				 errdetail_plural("This command would create a multixact with %u members, but the remaining space is only enough for %u member.",
-								  "This command would create a multixact with %u members, but the remaining space is only enough for %u members.",
-								  MultiXactState->offsetStopLimit - nextOffset - 1,
-								  nmembers,
-								  MultiXactState->offsetStopLimit - nextOffset - 1),
-				 errhint("Execute a database-wide VACUUM in database with OID %u with reduced \"vacuum_multixact_freeze_min_age\" and \"vacuum_multixact_freeze_table_age\" settings.",
-						 MultiXactState->oldestMultiXactDB)));
-	}
-
-	/*
-	 * Check whether we should kick autovacuum into action, to prevent members
-	 * wraparound. NB we use a much larger window to trigger autovacuum than
-	 * just the warning limit. The warning is just a measure of last resort -
-	 * this is in line with GetNewTransactionId's behaviour.
-	 */
-	if (!MultiXactState->oldestOffsetKnown ||
-		(MultiXactState->nextOffset - MultiXactState->oldestOffset
-		 > MULTIXACT_MEMBER_SAFE_THRESHOLD))
-	{
-		/*
-		 * To avoid swamping the postmaster with signals, we issue the autovac
-		 * request only when crossing a segment boundary. With default
-		 * compilation settings that's roughly after 50k members.  This still
-		 * gives plenty of chances before we get into real trouble.
-		 */
-		if ((MXOffsetToMemberPage(nextOffset) / SLRU_PAGES_PER_SEGMENT) !=
-			(MXOffsetToMemberPage(nextOffset + nmembers) / SLRU_PAGES_PER_SEGMENT))
-			SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER);
-	}
-
-	if (MultiXactState->oldestOffsetKnown &&
-		MultiXactOffsetWouldWrap(MultiXactState->offsetStopLimit,
-								 nextOffset,
-								 nmembers + MULTIXACT_MEMBERS_PER_PAGE * SLRU_PAGES_PER_SEGMENT * OFFSET_WARN_SEGMENTS))
-		ereport(WARNING,
-				(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
-				 errmsg_plural("database with OID %u must be vacuumed before %d more multixact member is used",
-							   "database with OID %u must be vacuumed before %d more multixact members are used",
-							   MultiXactState->offsetStopLimit - nextOffset + nmembers,
-							   MultiXactState->oldestMultiXactDB,
-							   MultiXactState->offsetStopLimit - nextOffset + nmembers),
-				 errhint("Execute a database-wide VACUUM in that database with reduced \"vacuum_multixact_freeze_min_age\" and \"vacuum_multixact_freeze_table_age\" settings.")));
-
 	ExtendMultiXactMember(nextOffset, nmembers);
 
 	/*
@@ -1968,7 +1883,7 @@ MultiXactShmemInit(void)
 				  "pg_multixact/offsets", LWTRANCHE_MULTIXACTOFFSET_BUFFER,
 				  LWTRANCHE_MULTIXACTOFFSET_SLRU,
 				  SYNC_HANDLER_MULTIXACT_OFFSET,
-				  false);
+				  true);
 	SlruPagePrecedesUnitTests(MultiXactOffsetCtl, MULTIXACT_OFFSETS_PER_PAGE);
 	SimpleLruInit(MultiXactMemberCtl,
 				  "multixact_member", multixact_member_buffers, 0,
@@ -2713,8 +2628,6 @@ SetOffsetVacuumLimit(bool is_startup)
 	MultiXactOffset nextOffset;
 	bool		oldestOffsetKnown = false;
 	bool		prevOldestOffsetKnown;
-	MultiXactOffset offsetStopLimit = 0;
-	MultiXactOffset prevOffsetStopLimit;
 
 	/*
 	 * NB: Have to prevent concurrent truncation, we might otherwise try to
@@ -2729,7 +2642,6 @@ SetOffsetVacuumLimit(bool is_startup)
 	nextOffset = MultiXactState->nextOffset;
 	prevOldestOffsetKnown = MultiXactState->oldestOffsetKnown;
 	prevOldestOffset = MultiXactState->oldestOffset;
-	prevOffsetStopLimit = MultiXactState->offsetStopLimit;
 	Assert(MultiXactState->finishedStartup);
 	LWLockRelease(MultiXactGenLock);
 
@@ -2760,11 +2672,7 @@ SetOffsetVacuumLimit(bool is_startup)
 		oldestOffsetKnown =
 			find_multixact_start(oldestMultiXactId, &oldestOffset);
 
-		if (oldestOffsetKnown)
-			ereport(DEBUG1,
-					(errmsg_internal("oldest MultiXactId member is at offset %u",
-									 oldestOffset)));
-		else
+		if (!oldestOffsetKnown)
 			ereport(LOG,
 					(errmsg("MultiXact member wraparound protections are disabled because oldest checkpointed MultiXact %u does not exist on disk",
 							oldestMultiXactId)));
@@ -2777,24 +2685,7 @@ SetOffsetVacuumLimit(bool is_startup)
 	 * overrun of old data in the members SLRU area. We can only do so if the
 	 * oldest offset is known though.
 	 */
-	if (oldestOffsetKnown)
-	{
-		/* move back to start of the corresponding segment */
-		offsetStopLimit = oldestOffset - (oldestOffset %
-										  (MULTIXACT_MEMBERS_PER_PAGE * SLRU_PAGES_PER_SEGMENT));
-
-		/* always leave one segment before the wraparound point */
-		offsetStopLimit -= (MULTIXACT_MEMBERS_PER_PAGE * SLRU_PAGES_PER_SEGMENT);
-
-		if (!prevOldestOffsetKnown && !is_startup)
-			ereport(LOG,
-					(errmsg("MultiXact member wraparound protections are now enabled")));
-
-		ereport(DEBUG1,
-				(errmsg_internal("MultiXact member stop limit is now %u based on MultiXact %u",
-								 offsetStopLimit, oldestMultiXactId)));
-	}
-	else if (prevOldestOffsetKnown)
+	if (prevOldestOffsetKnown)
 	{
 		/*
 		 * If we failed to get the oldest offset this time, but we have a
@@ -2804,14 +2695,12 @@ SetOffsetVacuumLimit(bool is_startup)
 		 */
 		oldestOffset = prevOldestOffset;
 		oldestOffsetKnown = true;
-		offsetStopLimit = prevOffsetStopLimit;
 	}
 
 	/* Install the computed values */
 	LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE);
 	MultiXactState->oldestOffset = oldestOffset;
 	MultiXactState->oldestOffsetKnown = oldestOffsetKnown;
-	MultiXactState->offsetStopLimit = offsetStopLimit;
 	LWLockRelease(MultiXactGenLock);
 
 	/*
@@ -2821,54 +2710,6 @@ SetOffsetVacuumLimit(bool is_startup)
 		(nextOffset - oldestOffset > MULTIXACT_MEMBER_SAFE_THRESHOLD);
 }
 
-/*
- * Return whether adding "distance" to "start" would move past "boundary".
- *
- * We use this to determine whether the addition is "wrapping around" the
- * boundary point, hence the name.  The reason we don't want to use the regular
- * 2^31-modulo arithmetic here is that we want to be able to use the whole of
- * the 2^32-1 space here, allowing for more multixacts than would fit
- * otherwise.
- */
-static bool
-MultiXactOffsetWouldWrap(MultiXactOffset boundary, MultiXactOffset start,
-						 uint32 distance)
-{
-	MultiXactOffset finish;
-
-	/*
-	 * Note that offset number 0 is not used (see GetMultiXactIdMembers), so
-	 * if the addition wraps around the UINT_MAX boundary, skip that value.
-	 */
-	finish = start + distance;
-	if (finish < start)
-		finish++;
-
-	/*-----------------------------------------------------------------------
-	 * When the boundary is numerically greater than the starting point, any
-	 * value numerically between the two is not wrapped:
-	 *
-	 *	<----S----B---->
-	 *	[---)			 = F wrapped past B (and UINT_MAX)
-	 *		 [---)		 = F not wrapped
-	 *			  [----] = F wrapped past B
-	 *
-	 * When the boundary is numerically less than the starting point (i.e. the
-	 * UINT_MAX wraparound occurs somewhere in between) then all values in
-	 * between are wrapped:
-	 *
-	 *	<----B----S---->
-	 *	[---)			 = F not wrapped past B (but wrapped past UINT_MAX)
-	 *		 [---)		 = F wrapped past B (and UINT_MAX)
-	 *			  [----] = F not wrapped
-	 *-----------------------------------------------------------------------
-	 */
-	if (start < boundary)
-		return finish >= boundary || finish < start;
-	else
-		return finish >= boundary && finish < start;
-}
-
 /*
  * Find the starting offset of the given MultiXactId.
  *
@@ -2990,8 +2831,9 @@ MultiXactMemberFreezeThreshold(void)
 	 * we try to eliminate from the system is based on how far we are past
 	 * MULTIXACT_MEMBER_SAFE_THRESHOLD.
 	 */
-	fraction = (double) (members - MULTIXACT_MEMBER_SAFE_THRESHOLD) /
-		(MULTIXACT_MEMBER_DANGER_THRESHOLD - MULTIXACT_MEMBER_SAFE_THRESHOLD);
+	fraction = (double) (members - MULTIXACT_MEMBER_SAFE_THRESHOLD);
+	fraction /= (double) (MULTIXACT_MEMBER_DANGER_THRESHOLD - MULTIXACT_MEMBER_SAFE_THRESHOLD);
+
 	victim_multixacts = multixacts * fraction;
 
 	/* fraction could be > 1.0, but lowest possible freeze age is zero */
@@ -3041,10 +2883,10 @@ SlruScanDirCbFindEarliest(SlruCtl ctl, char *filename, int64 segpage, void *data
 static void
 PerformMembersTruncation(MultiXactOffset oldestOffset, MultiXactOffset newOldestOffset)
 {
-	const int	maxsegment = MXOffsetToMemberSegment(MaxMultiXactOffset);
-	int			startsegment = MXOffsetToMemberSegment(oldestOffset);
-	int			endsegment = MXOffsetToMemberSegment(newOldestOffset);
-	int			segment = startsegment;
+	const int64	maxsegment = MXOffsetToMemberSegment(MaxMultiXactOffset);
+	int64		startsegment = MXOffsetToMemberSegment(oldestOffset);
+	int64		endsegment = MXOffsetToMemberSegment(newOldestOffset);
+	int64		segment = startsegment;
 
 	/*
 	 * Delete all the segments but the last one. The last segment can still
@@ -3337,7 +3179,7 @@ MultiXactIdPrecedesOrEquals(MultiXactId multi1, MultiXactId multi2)
 static bool
 MultiXactOffsetPrecedes(MultiXactOffset offset1, MultiXactOffset offset2)
 {
-	int32		diff = (int32) (offset1 - offset2);
+	int64		diff = (int64) (offset1 - offset2);
 
 	return (diff < 0);
 }
diff --git a/src/bin/pg_resetwal/pg_resetwal.c b/src/bin/pg_resetwal/pg_resetwal.c
index 985cd06802..1af2ce4b93 100644
--- a/src/bin/pg_resetwal/pg_resetwal.c
+++ b/src/bin/pg_resetwal/pg_resetwal.c
@@ -264,7 +264,7 @@ main(int argc, char *argv[])
 
 			case 'O':
 				errno = 0;
-				set_mxoff = strtoul(optarg, &endptr, 0);
+				set_mxoff = strtou64(optarg, &endptr, 0);
 				if (endptr == optarg || *endptr != '\0' || errno != 0)
 				{
 					pg_log_error("invalid argument for option %s", "-O");
diff --git a/src/bin/pg_resetwal/t/001_basic.pl b/src/bin/pg_resetwal/t/001_basic.pl
index 9829e48106..f8a8eef44d 100644
--- a/src/bin/pg_resetwal/t/001_basic.pl
+++ b/src/bin/pg_resetwal/t/001_basic.pl
@@ -206,7 +206,7 @@ push @cmd,
   sprintf("%d,%d", hex($files[0]) == 0 ? 3 : hex($files[0]), hex($files[-1]));
 
 @files = get_slru_files('pg_multixact/offsets');
-$mult = 32 * $blcksz / 4;
+$mult = 32 * $blcksz / 8;
 # -m argument is "new,old"
 push @cmd, '-m',
   sprintf("%d,%d",
diff --git a/src/include/access/multixact.h b/src/include/access/multixact.h
index 7ffd256c74..90583634ec 100644
--- a/src/include/access/multixact.h
+++ b/src/include/access/multixact.h
@@ -27,7 +27,7 @@
 
 #define MultiXactIdIsValid(multi) ((multi) != InvalidMultiXactId)
 
-#define MaxMultiXactOffset	((MultiXactOffset) 0xFFFFFFFF)
+#define MaxMultiXactOffset	UINT64CONST(0xFFFFFFFFFFFFFFFF)
 
 /*
  * Possible multixact lock modes ("status").  The first four modes are for
diff --git a/src/include/c.h b/src/include/c.h
index dc1841346c..ccfb82b478 100644
--- a/src/include/c.h
+++ b/src/include/c.h
@@ -661,7 +661,7 @@ typedef uint32 SubTransactionId;
 /* MultiXactId must be equivalent to TransactionId, to fit in t_xmax */
 typedef TransactionId MultiXactId;
 
-typedef uint32 MultiXactOffset;
+typedef uint64 MultiXactOffset;
 
 typedef uint32 CommandId;
 
-- 
2.45.2

From 95226756a225ca6b95e2baafff502034c355310d Mon Sep 17 00:00:00 2001
From: Maxim Orlov <orlovmg@gmail.com>
Date: Wed, 7 Aug 2024 16:35:22 +0300
Subject: [PATCH v1 1/3] Use 64-bit format output for multixact offsets

Author: Maxim Orlov <orlovmg@gmail.com>
---
 src/backend/access/rmgrdesc/mxactdesc.c   |  9 ++++----
 src/backend/access/rmgrdesc/xlogdesc.c    |  4 ++--
 src/backend/access/transam/multixact.c    | 26 +++++++++++++----------
 src/backend/access/transam/xlogrecovery.c |  5 +++--
 src/bin/pg_controldata/pg_controldata.c   |  4 ++--
 src/bin/pg_resetwal/pg_resetwal.c         |  8 +++----
 6 files changed, 31 insertions(+), 25 deletions(-)

diff --git a/src/backend/access/rmgrdesc/mxactdesc.c b/src/backend/access/rmgrdesc/mxactdesc.c
index 3e8ad4d5ef..1b486de38c 100644
--- a/src/backend/access/rmgrdesc/mxactdesc.c
+++ b/src/backend/access/rmgrdesc/mxactdesc.c
@@ -65,8 +65,8 @@ multixact_desc(StringInfo buf, XLogReaderState *record)
 		xl_multixact_create *xlrec = (xl_multixact_create *) rec;
 		int			i;
 
-		appendStringInfo(buf, "%u offset %u nmembers %d: ", xlrec->mid,
-						 xlrec->moff, xlrec->nmembers);
+		appendStringInfo(buf, "%u offset %llu nmembers %d: ", xlrec->mid,
+						 (unsigned long long) xlrec->moff, xlrec->nmembers);
 		for (i = 0; i < xlrec->nmembers; i++)
 			out_member(buf, &xlrec->members[i]);
 	}
@@ -74,9 +74,10 @@ multixact_desc(StringInfo buf, XLogReaderState *record)
 	{
 		xl_multixact_truncate *xlrec = (xl_multixact_truncate *) rec;
 
-		appendStringInfo(buf, "offsets [%u, %u), members [%u, %u)",
+		appendStringInfo(buf, "offsets [%u, %u), members [%llu, %llu)",
 						 xlrec->startTruncOff, xlrec->endTruncOff,
-						 xlrec->startTruncMemb, xlrec->endTruncMemb);
+						 (unsigned long long) xlrec->startTruncMemb,
+						 (unsigned long long) xlrec->endTruncMemb);
 	}
 }
 
diff --git a/src/backend/access/rmgrdesc/xlogdesc.c b/src/backend/access/rmgrdesc/xlogdesc.c
index 363294d623..aaa19c81c8 100644
--- a/src/backend/access/rmgrdesc/xlogdesc.c
+++ b/src/backend/access/rmgrdesc/xlogdesc.c
@@ -66,7 +66,7 @@ xlog_desc(StringInfo buf, XLogReaderState *record)
 		CheckPoint *checkpoint = (CheckPoint *) rec;
 
 		appendStringInfo(buf, "redo %X/%X; "
-						 "tli %u; prev tli %u; fpw %s; wal_level %s; xid %u:%u; oid %u; multi %u; offset %u; "
+						 "tli %u; prev tli %u; fpw %s; wal_level %s; xid %u:%u; oid %u; multi %u; offset %llu; "
 						 "oldest xid %u in DB %u; oldest multi %u in DB %u; "
 						 "oldest/newest commit timestamp xid: %u/%u; "
 						 "oldest running xid %u; %s",
@@ -79,7 +79,7 @@ xlog_desc(StringInfo buf, XLogReaderState *record)
 						 XidFromFullTransactionId(checkpoint->nextXid),
 						 checkpoint->nextOid,
 						 checkpoint->nextMulti,
-						 checkpoint->nextMultiOffset,
+						 (unsigned long long) checkpoint->nextMultiOffset,
 						 checkpoint->oldestXid,
 						 checkpoint->oldestXidDB,
 						 checkpoint->oldestMulti,
diff --git a/src/backend/access/transam/multixact.c b/src/backend/access/transam/multixact.c
index c601ff98a1..57c5148933 100644
--- a/src/backend/access/transam/multixact.c
+++ b/src/backend/access/transam/multixact.c
@@ -1258,7 +1258,8 @@ GetNewMultiXactId(int nmembers, MultiXactOffset *offset)
 
 	LWLockRelease(MultiXactGenLock);
 
-	debug_elog4(DEBUG2, "GetNew: returning %u offset %u", result, *offset);
+	debug_elog4(DEBUG2, "GetNew: returning %u offset %llu", result,
+				(unsigned long long) *offset);
 	return result;
 }
 
@@ -2285,8 +2286,9 @@ MultiXactGetCheckptMulti(bool is_shutdown,
 	LWLockRelease(MultiXactGenLock);
 
 	debug_elog6(DEBUG2,
-				"MultiXact: checkpoint is nextMulti %u, nextOffset %u, oldestMulti %u in DB %u",
-				*nextMulti, *nextMultiOffset, *oldestMulti, *oldestMultiDB);
+				"MultiXact: checkpoint is nextMulti %u, nextOffset %llu, oldestMulti %u in DB %u",
+				*nextMulti, (unsigned long long) *nextMultiOffset, *oldestMulti,
+				*oldestMultiDB);
 }
 
 /*
@@ -2320,8 +2322,8 @@ void
 MultiXactSetNextMXact(MultiXactId nextMulti,
 					  MultiXactOffset nextMultiOffset)
 {
-	debug_elog4(DEBUG2, "MultiXact: setting next multi to %u offset %u",
-				nextMulti, nextMultiOffset);
+	debug_elog4(DEBUG2, "MultiXact: setting next multi to %u offset %llu",
+				nextMulti, (unsigned long long) nextMultiOffset);
 	LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE);
 	MultiXactState->nextMXact = nextMulti;
 	MultiXactState->nextOffset = nextMultiOffset;
@@ -2511,8 +2513,8 @@ MultiXactAdvanceNextMXact(MultiXactId minMulti,
 	}
 	if (MultiXactOffsetPrecedes(MultiXactState->nextOffset, minMultiOffset))
 	{
-		debug_elog3(DEBUG2, "MultiXact: setting next offset to %u",
-					minMultiOffset);
+		debug_elog3(DEBUG2, "MultiXact: setting next offset to %llu",
+					(unsigned long long) minMultiOffset);
 		MultiXactState->nextOffset = minMultiOffset;
 	}
 	LWLockRelease(MultiXactGenLock);
@@ -3203,11 +3205,12 @@ TruncateMultiXact(MultiXactId newOldestMulti, Oid newOldestMultiDB)
 
 	elog(DEBUG1, "performing multixact truncation: "
 		 "offsets [%u, %u), offsets segments [%llx, %llx), "
-		 "members [%u, %u), members segments [%llx, %llx)",
+		 "members [%llu, %llu), members segments [%llx, %llx)",
 		 oldestMulti, newOldestMulti,
 		 (unsigned long long) MultiXactIdToOffsetSegment(oldestMulti),
 		 (unsigned long long) MultiXactIdToOffsetSegment(newOldestMulti),
-		 oldestOffset, newOldestOffset,
+		 (unsigned long long) oldestOffset,
+		 (unsigned long long) newOldestOffset,
 		 (unsigned long long) MXOffsetToMemberSegment(oldestOffset),
 		 (unsigned long long) MXOffsetToMemberSegment(newOldestOffset));
 
@@ -3463,11 +3466,12 @@ multixact_redo(XLogReaderState *record)
 
 		elog(DEBUG1, "replaying multixact truncation: "
 			 "offsets [%u, %u), offsets segments [%llx, %llx), "
-			 "members [%u, %u), members segments [%llx, %llx)",
+			 "members [%llu, %llu), members segments [%llx, %llx)",
 			 xlrec.startTruncOff, xlrec.endTruncOff,
 			 (unsigned long long) MultiXactIdToOffsetSegment(xlrec.startTruncOff),
 			 (unsigned long long) MultiXactIdToOffsetSegment(xlrec.endTruncOff),
-			 xlrec.startTruncMemb, xlrec.endTruncMemb,
+			 (unsigned long long) xlrec.startTruncMemb,
+			 (unsigned long long) xlrec.endTruncMemb,
 			 (unsigned long long) MXOffsetToMemberSegment(xlrec.startTruncMemb),
 			 (unsigned long long) MXOffsetToMemberSegment(xlrec.endTruncMemb));
 
diff --git a/src/backend/access/transam/xlogrecovery.c b/src/backend/access/transam/xlogrecovery.c
index ad817fbca6..388037a94b 100644
--- a/src/backend/access/transam/xlogrecovery.c
+++ b/src/backend/access/transam/xlogrecovery.c
@@ -877,8 +877,9 @@ InitWalRecovery(ControlFileData *ControlFile, bool *wasShutdown_ptr,
 							 U64FromFullTransactionId(checkPoint.nextXid),
 							 checkPoint.nextOid)));
 	ereport(DEBUG1,
-			(errmsg_internal("next MultiXactId: %u; next MultiXactOffset: %u",
-							 checkPoint.nextMulti, checkPoint.nextMultiOffset)));
+			(errmsg_internal("next MultiXactId: %u; next MultiXactOffset: %llu",
+							 checkPoint.nextMulti,
+							 (unsigned long long) checkPoint.nextMultiOffset)));
 	ereport(DEBUG1,
 			(errmsg_internal("oldest unfrozen transaction ID: %u, in database %u",
 							 checkPoint.oldestXid, checkPoint.oldestXidDB)));
diff --git a/src/bin/pg_controldata/pg_controldata.c b/src/bin/pg_controldata/pg_controldata.c
index 93a05d80ca..43b6727570 100644
--- a/src/bin/pg_controldata/pg_controldata.c
+++ b/src/bin/pg_controldata/pg_controldata.c
@@ -253,8 +253,8 @@ main(int argc, char *argv[])
 		   ControlFile->checkPointCopy.nextOid);
 	printf(_("Latest checkpoint's NextMultiXactId:  %u\n"),
 		   ControlFile->checkPointCopy.nextMulti);
-	printf(_("Latest checkpoint's NextMultiOffset:  %u\n"),
-		   ControlFile->checkPointCopy.nextMultiOffset);
+	printf(_("Latest checkpoint's NextMultiOffset:  %llu\n"),
+		   (unsigned long long) ControlFile->checkPointCopy.nextMultiOffset);
 	printf(_("Latest checkpoint's oldestXID:        %u\n"),
 		   ControlFile->checkPointCopy.oldestXid);
 	printf(_("Latest checkpoint's oldestXID's DB:   %u\n"),
diff --git a/src/bin/pg_resetwal/pg_resetwal.c b/src/bin/pg_resetwal/pg_resetwal.c
index e9dcb5a6d8..985cd06802 100644
--- a/src/bin/pg_resetwal/pg_resetwal.c
+++ b/src/bin/pg_resetwal/pg_resetwal.c
@@ -737,8 +737,8 @@ PrintControlValues(bool guessed)
 		   ControlFile.checkPointCopy.nextOid);
 	printf(_("Latest checkpoint's NextMultiXactId:  %u\n"),
 		   ControlFile.checkPointCopy.nextMulti);
-	printf(_("Latest checkpoint's NextMultiOffset:  %u\n"),
-		   ControlFile.checkPointCopy.nextMultiOffset);
+	printf(_("Latest checkpoint's NextMultiOffset:  %llu\n"),
+		   (unsigned long long) ControlFile.checkPointCopy.nextMultiOffset);
 	printf(_("Latest checkpoint's oldestXID:        %u\n"),
 		   ControlFile.checkPointCopy.oldestXid);
 	printf(_("Latest checkpoint's oldestXID's DB:   %u\n"),
@@ -809,8 +809,8 @@ PrintNewControlValues(void)
 
 	if (set_mxoff != -1)
 	{
-		printf(_("NextMultiOffset:                      %u\n"),
-			   ControlFile.checkPointCopy.nextMultiOffset);
+		printf(_("NextMultiOffset:                      %llu\n"),
+			   (unsigned long long) ControlFile.checkPointCopy.nextMultiOffset);
 	}
 
 	if (set_oid != 0)
-- 
2.45.2

From 063ec2662d94f7a72e3162702c4051f34cd67000 Mon Sep 17 00:00:00 2001
From: Maxim Orlov <m.orlov@postgrespro.ru>
Date: Tue, 13 Aug 2024 14:44:50 +0300
Subject: [PATCH v1 3/3] Make pg_upgrade convert multixact offsets.

Author: Maxim Orlov <orlovmg@gmail.com>
---
 src/bin/pg_upgrade/Makefile      |   1 +
 src/bin/pg_upgrade/meson.build   |   1 +
 src/bin/pg_upgrade/pg_upgrade.c  |  29 ++-
 src/bin/pg_upgrade/pg_upgrade.h  |  13 +-
 src/bin/pg_upgrade/segresize.c   | 350 +++++++++++++++++++++++++++++++
 src/include/catalog/catversion.h |   2 +-
 6 files changed, 391 insertions(+), 5 deletions(-)
 create mode 100644 src/bin/pg_upgrade/segresize.c

diff --git a/src/bin/pg_upgrade/Makefile b/src/bin/pg_upgrade/Makefile
index bde91e2beb..030816596f 100644
--- a/src/bin/pg_upgrade/Makefile
+++ b/src/bin/pg_upgrade/Makefile
@@ -21,6 +21,7 @@ OBJS = \
 	info.o \
 	option.o \
 	parallel.o \
+	segresize.o \
 	pg_upgrade.o \
 	relfilenumber.o \
 	server.o \
diff --git a/src/bin/pg_upgrade/meson.build b/src/bin/pg_upgrade/meson.build
index 9825fa3305..2d9f7e6b65 100644
--- a/src/bin/pg_upgrade/meson.build
+++ b/src/bin/pg_upgrade/meson.build
@@ -10,6 +10,7 @@ pg_upgrade_sources = files(
   'info.c',
   'option.c',
   'parallel.c',
+  'segresize.c',
   'pg_upgrade.c',
   'relfilenumber.c',
   'server.c',
diff --git a/src/bin/pg_upgrade/pg_upgrade.c b/src/bin/pg_upgrade/pg_upgrade.c
index 663235816f..d9d8d0ea78 100644
--- a/src/bin/pg_upgrade/pg_upgrade.c
+++ b/src/bin/pg_upgrade/pg_upgrade.c
@@ -750,7 +750,30 @@ copy_xact_xlog_xid(void)
 	if (old_cluster.controldata.cat_ver >= MULTIXACT_FORMATCHANGE_CAT_VER &&
 		new_cluster.controldata.cat_ver >= MULTIXACT_FORMATCHANGE_CAT_VER)
 	{
-		copy_subdir_files("pg_multixact/offsets", "pg_multixact/offsets");
+		/*
+		 * If the old server is before the MULTIXACTOFFSET_FORMATCHANGE_CAT_VER
+		 * it must have 32-bit multixid offsets, thus it should be converted.
+		 */
+		if (old_cluster.controldata.cat_ver < MULTIXACTOFFSET_FORMATCHANGE_CAT_VER &&
+			new_cluster.controldata.cat_ver >= MULTIXACTOFFSET_FORMATCHANGE_CAT_VER)
+		{
+			uint64	oldest_offset = convert_multixact_offsets();
+
+			if (oldest_offset)
+			{
+				uint64	next_offset = old_cluster.controldata.chkpnt_nxtmxoff;
+
+				/* Handle possible wraparound. */
+				if (next_offset < oldest_offset)
+					next_offset += ((uint64) 1 << 32) - 1;
+
+				next_offset -= oldest_offset - 1;
+				old_cluster.controldata.chkpnt_nxtmxoff = next_offset;
+			}
+		}
+		else
+			copy_subdir_files("pg_multixact/offsets", "pg_multixact/offsets");
+
 		copy_subdir_files("pg_multixact/members", "pg_multixact/members");
 
 		prep_status("Setting next multixact ID and offset for new cluster");
@@ -760,9 +783,9 @@ copy_xact_xlog_xid(void)
 		 * counters here and the oldest multi present on system.
 		 */
 		exec_prog(UTILITY_LOG_FILE, NULL, true, true,
-				  "\"%s/pg_resetwal\" -O %u -m %u,%u \"%s\"",
+				  "\"%s/pg_resetwal\" -O %llu -m %u,%u \"%s\"",
 				  new_cluster.bindir,
-				  old_cluster.controldata.chkpnt_nxtmxoff,
+				  (unsigned long long) old_cluster.controldata.chkpnt_nxtmxoff,
 				  old_cluster.controldata.chkpnt_nxtmulti,
 				  old_cluster.controldata.chkpnt_oldstMulti,
 				  new_cluster.pgdata);
diff --git a/src/bin/pg_upgrade/pg_upgrade.h b/src/bin/pg_upgrade/pg_upgrade.h
index cdb6e2b759..37d173cb86 100644
--- a/src/bin/pg_upgrade/pg_upgrade.h
+++ b/src/bin/pg_upgrade/pg_upgrade.h
@@ -114,6 +114,13 @@ extern char *output_files[];
  */
 #define MULTIXACT_FORMATCHANGE_CAT_VER 201301231
 
+/*
+ * Swicth from 32-bit to 64-bit for multixid offsets.
+ *
+ * XXX: should be changed to the actual CATALOG_VERSION_NO on commit.
+ */
+#define MULTIXACTOFFSET_FORMATCHANGE_CAT_VER 202408123
+
 /*
  * large object chunk size added to pg_controldata,
  * commit 5f93c37805e7485488480916b4585e098d3cc883
@@ -230,7 +237,7 @@ typedef struct
 	uint32		chkpnt_nxtepoch;
 	uint32		chkpnt_nxtoid;
 	uint32		chkpnt_nxtmulti;
-	uint32		chkpnt_nxtmxoff;
+	uint64		chkpnt_nxtmxoff;
 	uint32		chkpnt_oldstMulti;
 	uint32		chkpnt_oldstxid;
 	uint32		align;
@@ -494,3 +501,7 @@ void		parallel_transfer_all_new_dbs(DbInfoArr *old_db_arr, DbInfoArr *new_db_arr
 										  char *old_pgdata, char *new_pgdata,
 										  char *old_tablespace);
 bool		reap_child(bool wait_for_child);
+
+/* segresize.c */
+
+uint64		convert_multixact_offsets(void);
diff --git a/src/bin/pg_upgrade/segresize.c b/src/bin/pg_upgrade/segresize.c
new file mode 100644
index 0000000000..e47c0a2407
--- /dev/null
+++ b/src/bin/pg_upgrade/segresize.c
@@ -0,0 +1,350 @@
+/*
+ *	segresize.c
+ *
+ *	SLRU segment resize utility
+ *
+ *	Copyright (c) 2024, PostgreSQL Global Development Group
+ *	src/bin/pg_upgrade/segresize.c
+ */
+
+#include "postgres_fe.h"
+
+#include "pg_upgrade.h"
+#include "access/multixact.h"
+
+/* See slru.h */
+#define SLRU_PAGES_PER_SEGMENT		32
+
+/*
+ * Some kind of iterator associated with a particular SLRU segment.  The idea is
+ * to specify the segment and page number and then move through the pages.
+ */
+typedef struct SlruSegState
+{
+	char	   *dir;
+	char	   *fn;
+	FILE	   *file;
+	int64		segno;
+	uint64		pageno;
+	bool		leading_gap;
+	bool		long_segment_names;
+} SlruSegState;
+
+/*
+ * Get SLRU segmen file name from state.
+ *
+ * NOTE: this function should mirror SlruFileName call.
+ */
+static inline char *
+SlruFileName(SlruSegState *state)
+{
+	if (state->long_segment_names)
+	{
+		Assert(state->segno >= 0 &&
+			   state->segno <= INT64CONST(0xFFFFFFFFFFFFFFF));
+		return psprintf("%s/%015llX", state->dir, (long long) state->segno);
+	}
+	else
+	{
+		Assert(state->segno >= 0 &&
+			   state->segno <= INT64CONST(0xFFFFFF));
+		return psprintf("%s/%04X", state->dir, (unsigned int) state->segno);
+	}
+}
+
+/*
+ * Create SLRU segment file.
+ */
+static void
+create_segment(SlruSegState *state)
+{
+	Assert(state->fn == NULL);
+	Assert(state->file == NULL);
+
+	state->fn = SlruFileName(state);
+	state->file = fopen(state->fn, "wb");
+	if (!state->file)
+		pg_fatal("could not create file \"%s\": %m", state->fn);
+}
+
+/*
+ * Open existing SLRU segment file.
+ */
+static void
+open_segment(SlruSegState *state)
+{
+	Assert(state->fn == NULL);
+	Assert(state->file == NULL);
+
+	state->fn = SlruFileName(state);
+	state->file = fopen(state->fn, "rb");
+	if (!state->file)
+		pg_fatal("could not open file \"%s\": %m", state->fn);
+}
+
+/*
+ * Close SLRU segment file.
+ */
+static void
+close_segment(SlruSegState *state)
+{
+	if (state->file)
+	{
+		fclose(state->file);
+		state->file = NULL;
+	}
+
+	if (state->fn)
+	{
+		pfree(state->fn);
+		state->fn = NULL;
+	}
+}
+
+/*
+ * Read next page from the old 32-bit offset segment file.
+ */
+static int
+read_old_segment_page(SlruSegState *state, void *buf, bool *empty)
+{
+	int		len;
+
+	/* Open next segment file, if needed. */
+	if (!state->fn)
+	{
+		if (!state->segno)
+			state->leading_gap = true;
+
+		open_segment(state);
+
+		/* Set position to the needed page. */
+		if (state->pageno > 0 &&
+			fseek(state->file, state->pageno * BLCKSZ, SEEK_SET))
+		{
+			close_segment(state);
+		}
+	}
+
+	if (state->file)
+	{
+		/* Segment file do exists, read page from it. */
+		state->leading_gap = false;
+
+		len = fread(buf, sizeof(char), BLCKSZ, state->file);
+
+		/* Are we done or was there an error? */
+		if (len <= 0)
+		{
+			if (ferror(state->file))
+				pg_fatal("error reading file \"%s\": %m", state->fn);
+
+			if (feof(state->file))
+			{
+				*empty = true;
+				len = -1;
+
+				close_segment(state);
+			}
+		}
+		else
+			*empty = false;
+	}
+	else if (!state->leading_gap)
+	{
+		/* We reached the last segment. */
+		len = -1;
+		*empty = true;
+	}
+	else
+	{
+		/* Skip few first segments if they were frozen and removed. */
+		len = BLCKSZ;
+		*empty = true;
+	}
+
+	if (++state->pageno >= SLRU_PAGES_PER_SEGMENT)
+	{
+		/* Start a new segment. */
+		state->segno++;
+		state->pageno = 0;
+
+		close_segment(state);
+	}
+
+	return len;
+}
+
+/*
+ * Write next page to the new 64-bit offset segment file.
+ */
+static void
+write_new_segment_page(SlruSegState *state, void *buf)
+{
+	/*
+	 * Create a new segment file if we still didn't.  Creation is
+	 * postponed until the first non-empty page is found.  This helps
+	 * not to create completely empty segments.
+	 */
+	if (!state->file)
+	{
+		create_segment(state);
+
+		/* Write zeroes to the previously skipped prefix. */
+		if (state->pageno > 0)
+		{
+			char		zerobuf[BLCKSZ] = {0};
+
+			for (int64 i = 0; i < state->pageno; i++)
+			{
+				if (fwrite(zerobuf, sizeof(char), BLCKSZ, state->file) != BLCKSZ)
+					pg_fatal("could not write file \"%s\": %m", state->fn);
+			}
+		}
+	}
+
+	/* Write page to the new segment (if it was created). */
+	if (state->file)
+	{
+		if (fwrite(buf, sizeof(char), BLCKSZ, state->file) != BLCKSZ)
+			pg_fatal("could not write file \"%s\": %m", state->fn);
+	}
+
+	state->pageno++;
+
+	/*
+	 * Did we reach the maximum page number?  Then close segment file
+	 * and create a new one on the next iteration.
+	 */
+	if (state->pageno >= SLRU_PAGES_PER_SEGMENT)
+	{
+		state->segno++;
+		state->pageno = 0;
+		close_segment(state);
+	}
+}
+
+/*
+ * Convert pg_multixact/offsets segments and return oldest multi offset.
+ */
+uint64
+convert_multixact_offsets(void)
+{
+	/* See multixact.c */
+#define MULTIXACT_OFFSETS_PER_PAGE_OLD	(BLCKSZ / sizeof(uint32))
+#define MULTIXACT_OFFSETS_PER_PAGE		(BLCKSZ / sizeof(MultiXactOffset))
+
+	SlruSegState	oldseg = {0},
+					newseg = {0};
+	uint32			oldbuf[MULTIXACT_OFFSETS_PER_PAGE_OLD] = {0};
+	MultiXactOffset	newbuf[MULTIXACT_OFFSETS_PER_PAGE] = {0};
+	/*
+	 * It is much easier to deal with multi wraparound in 64 bitd format.  Thus
+	 * we use 64 bits for multi-transactions, although they remain 32 bits.
+	 */
+	uint64			oldest_multi = old_cluster.controldata.chkpnt_oldstMulti,
+					next_multi = old_cluster.controldata.chkpnt_nxtmulti,
+					multi,
+					old_entry,
+					new_entry;
+	bool			found = false;
+	uint64			oldest_offset = 0;
+
+	prep_status("Converting pg_multixact/offsets to 64-bit");
+
+	oldseg.pageno = oldest_multi / MULTIXACT_OFFSETS_PER_PAGE_OLD;
+	oldseg.segno = oldseg.pageno / SLRU_PAGES_PER_SEGMENT;
+	oldseg.pageno %= SLRU_PAGES_PER_SEGMENT;
+	oldseg.dir = psprintf("%s/pg_multixact/offsets", old_cluster.pgdata);
+	oldseg.long_segment_names = false;		/* old format XXXX */
+
+	newseg.pageno = oldest_multi / MULTIXACT_OFFSETS_PER_PAGE;
+	newseg.segno = newseg.pageno / SLRU_PAGES_PER_SEGMENT;
+	newseg.pageno %= SLRU_PAGES_PER_SEGMENT;
+	newseg.dir = psprintf("%s/pg_multixact/offsets", new_cluster.pgdata);
+	newseg.long_segment_names = true;
+
+	old_entry = oldest_multi % MULTIXACT_OFFSETS_PER_PAGE_OLD;
+	new_entry = oldest_multi % MULTIXACT_OFFSETS_PER_PAGE;
+
+	if (next_multi < oldest_multi)
+		next_multi += (uint64) 1 << 32;		/* wraparound */
+
+	for (multi = oldest_multi; multi < next_multi; old_entry = 0)
+	{
+		int			oldlen;
+		bool		empty;
+
+		/* Handle possible segment wraparound. */
+		if (oldseg.segno > MaxMultiXactId /
+								MULTIXACT_OFFSETS_PER_PAGE_OLD /
+								SLRU_PAGES_PER_SEGMENT)
+			oldseg.segno = 0;
+
+		/* Read old offset segment. */
+		oldlen = read_old_segment_page(&oldseg, oldbuf, &empty);
+		if (oldlen <= 0 || empty)
+			pg_fatal("cannot read page %llu from file \"%s\": %m",
+					 (unsigned long long) oldseg.pageno, oldseg.fn);
+
+		/* Fill possible gap. */
+		if (oldlen < BLCKSZ)
+			memset((char *) oldbuf + oldlen, 0, BLCKSZ - oldlen);
+
+		/* Save oldest multi offset */
+		if (!found)
+		{
+			oldest_offset = oldbuf[old_entry];
+			found = true;
+		}
+
+		/* ... skip wrapped-around invalid multi */
+		if (multi == (uint64) 1 << 32)
+		{
+			Assert(oldseg.segno == 0);
+			Assert(oldseg.pageno == 1);
+			Assert(old_entry == 0);
+
+			multi += FirstMultiXactId;
+			old_entry = FirstMultiXactId;
+		}
+
+		/* Copy entries to the new page. */
+		for (; multi < next_multi && old_entry < MULTIXACT_OFFSETS_PER_PAGE_OLD;
+			 multi++, old_entry++)
+		{
+			MultiXactOffset offset = oldbuf[old_entry];
+
+			/* Handle possible offset wraparound. */
+			if (offset < oldest_offset)
+				offset += ((uint64) 1 << 32) - 1;
+
+			/* Subtract oldest_offset, so new offsets will start from 1. */
+			newbuf[new_entry++] = offset - oldest_offset + 1;
+			if (new_entry >= MULTIXACT_OFFSETS_PER_PAGE)
+			{
+				/* Write a new page. */
+				write_new_segment_page(&newseg, newbuf);
+				new_entry = 0;
+			}
+		}
+	}
+
+	/* Write the last incomplete page. */
+	if (new_entry > 0 || oldest_multi == next_multi)
+	{
+		memset(&newbuf[new_entry], 0,
+			   sizeof(newbuf[0]) * (MULTIXACT_OFFSETS_PER_PAGE - new_entry));
+		write_new_segment_page(&newseg, newbuf);
+	}
+
+	/* Release resources. */
+	close_segment(&oldseg);
+	close_segment(&newseg);
+
+	pfree(oldseg.dir);
+	pfree(newseg.dir);
+
+	check_ok();
+
+	return oldest_offset;
+}
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h
index 9a0ae27823..f29dc9fc92 100644
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -57,6 +57,6 @@
  */
 
 /*							yyyymmddN */
-#define CATALOG_VERSION_NO	202408122
+#define CATALOG_VERSION_NO	202408123
 
 #endif
-- 
2.45.2

Reply via email to