From 7fc3097442f7bfcb3c4baa890b02bed433f7320d Mon Sep 17 00:00:00 2001
From: Andrey Borodin <amborodin@acm.org>
Date: Wed, 25 Jun 2025 14:34:06 +0500
Subject: [PATCH v5] Make next multixact sleep timed with a recovery conflict
 check

---
 contrib/amcheck/t/006_MultiXact_standby.pl | 121 +++++++++++++++++++++
 src/backend/access/transam/multixact.c     |  17 ++-
 src/backend/storage/ipc/standby.c          |   1 +
 src/backend/tcop/postgres.c                |   8 +-
 src/test/perl/PostgreSQL/Test/Cluster.pm   |  48 ++++++++
 5 files changed, 191 insertions(+), 4 deletions(-)
 create mode 100644 contrib/amcheck/t/006_MultiXact_standby.pl

diff --git a/contrib/amcheck/t/006_MultiXact_standby.pl b/contrib/amcheck/t/006_MultiXact_standby.pl
new file mode 100644
index 00000000000..2cdd0d93be3
--- /dev/null
+++ b/contrib/amcheck/t/006_MultiXact_standby.pl
@@ -0,0 +1,121 @@
+
+# Copyright (c) 2021-2022, PostgreSQL Global Development Group
+
+# Minimal test testing multixacts with streaming replication
+use strict;
+use warnings;
+use Config;
+use PostgreSQL::Test::Cluster;
+use PostgreSQL::Test::Utils;
+use Test::More tests => 4;
+
+# Initialize primary node
+my $node_primary = PostgreSQL::Test::Cluster->new('primary');
+# A specific role is created for replication purposes
+$node_primary->init(
+	allows_streaming => 1,
+	auth_extra       => [ '--create-role', 'repl_role' ]);
+$node_primary->append_conf('postgresql.conf', 'lock_timeout = 180000');
+$node_primary->append_conf('postgresql.conf', 'max_connections = 500');
+$node_primary->start;
+my $backup_name = 'my_backup';
+
+# Take backup
+$node_primary->backup($backup_name);
+
+# Create streaming standby linking to primary
+my $node_standby_1 = PostgreSQL::Test::Cluster->new('standby_1');
+$node_standby_1->init_from_backup($node_primary, $backup_name,
+	has_streaming => 1);
+$node_standby_1->start;
+
+# Create some content on primary and check its presence in standby nodes
+$node_primary->safe_psql('postgres', q(create table tbl2 (
+    id int primary key,
+    val int
+);
+insert into tbl2 select i, 0 from generate_series(1,100000) i;
+));
+
+# Wait for standbys to catch up
+my $primary_lsn = $node_primary->lsn('write');
+$node_primary->wait_for_catchup($node_standby_1, 'replay', $primary_lsn);
+
+#
+# Stress CIC with pgbench
+#
+
+# Run background pgbench with bt_index_check on standby
+my $pgbench_out   = '';
+my $pgbench_timer = IPC::Run::timeout(180);
+my $pgbench_h     = $node_standby_1->background_pgbench(
+	'--no-vacuum --report-per-command -M prepared -c 10 -j 2 -T 10',
+	{
+		'006_pgbench_standby_check_1' => q(
+			begin;
+			select sum(val) from tbl2;
+			\sleep 10 ms
+			select sum(val) from tbl2;
+			\sleep 10 ms
+			select sum(val) from tbl2;
+			\sleep 10 ms
+			select sum(val) from tbl2;
+			\sleep 10 ms
+			select sum(val) from tbl2;
+			\sleep 10 ms
+			select sum(val) from tbl2;
+			\sleep 10 ms
+			select sum(val) from tbl2;
+			\sleep 10 ms
+			select sum(val) from tbl2;
+			\sleep 10 ms
+			select sum(val) from tbl2;
+			\sleep 10 ms
+			select sum(val) from tbl2;
+			\sleep 10 ms
+			commit;
+		   )
+	},
+	\$pgbench_out,
+	$pgbench_timer);
+
+# Run pgbench with data data manipulations and REINDEX on primary.
+# pgbench might try to launch more than one instance of the RIC
+# transaction concurrently.  That would deadlock, so use an advisory
+# lock to ensure only one CIC runs at a time.
+$node_primary->pgbench(
+	'--no-vacuum --report-per-command -M prepared -c 10 -j 2 -T 10',
+	0,
+	[qr{actually processed}],
+	[qr{^$}],
+	'concurrent updates',
+	{
+		'004_pgbench_updates' => q(
+			\set id random(1, 10000)
+			begin;
+			select * from tbl2 where id = :id for no key update;
+			\sleep 10 ms
+			savepoint s1;
+			update tbl2 set val = val+1 where id = :id;
+			\sleep 10 ms
+			commit;
+		  )
+	});
+
+$pgbench_h->pump_nb;
+$pgbench_h->finish();
+my $result =
+    ($Config{osname} eq "MSWin32")
+  ? ($pgbench_h->full_results)[0]
+  : $pgbench_h->result(0);
+is($result, 0, "pgbench with bt_index_check() on standby works");
+
+
+# Check that no deadlock occured
+$primary_lsn = $node_primary->lsn('write');
+$node_primary->wait_for_catchup($node_standby_1, 'replay', $primary_lsn);
+
+# done
+$node_primary->stop;
+$node_standby_1->stop;
+done_testing();
\ No newline at end of file
diff --git a/src/backend/access/transam/multixact.c b/src/backend/access/transam/multixact.c
index b7b47ef076a..8b4707a60bf 100644
--- a/src/backend/access/transam/multixact.c
+++ b/src/backend/access/transam/multixact.c
@@ -883,6 +883,9 @@ MultiXactIdCreateFromMembers(int nmembers, MultiXactMember *members)
 	XLogRegisterData((char *) (&xlrec), SizeOfMultiXactCreate);
 	XLogRegisterData((char *) members, nmembers * sizeof(MultiXactMember));
 
+	if (rand()%2 == 0)
+		pg_usleep(1000);
+
 	(void) XLogInsert(RM_MULTIXACT_ID, XLOG_MULTIXACT_CREATE_ID);
 
 	/* Now enter the information into the OFFSETs and MEMBERs logs */
@@ -1262,6 +1265,8 @@ GetNewMultiXactId(int nmembers, MultiXactOffset *offset)
 	return result;
 }
 
+void CheckReoveryInterrupts(void);
+
 /*
  * GetMultiXactIdMembers
  *		Return the set of MultiXactMembers that make up a MultiXactId
@@ -1478,10 +1483,16 @@ retry:
 		{
 			/* Corner case 2: next multixact is still being filled in */
 			LWLockRelease(lock);
-			CHECK_FOR_INTERRUPTS();
 
-			ConditionVariableSleep(&MultiXactState->nextoff_cv,
-								   WAIT_EVENT_MULTIXACT_CREATION);
+			if (ConditionVariableTimedSleep(&MultiXactState->nextoff_cv, 1,
+								   WAIT_EVENT_MULTIXACT_CREATION))
+			{
+				if (RecoveryInProgress() && !InRecovery)
+				{
+					CheckReoveryInterrupts();
+					CheckRecoveryConflictDeadlock();
+				}
+			}
 			slept = true;
 			goto retry;
 		}
diff --git a/src/backend/storage/ipc/standby.c b/src/backend/storage/ipc/standby.c
index 872679ca447..8da38429996 100644
--- a/src/backend/storage/ipc/standby.c
+++ b/src/backend/storage/ipc/standby.c
@@ -390,6 +390,7 @@ ResolveRecoveryConflictWithVirtualXIDs(VirtualTransactionId *waitlist,
 				 */
 				Assert(VirtualTransactionIdIsValid(*waitlist));
 				pid = CancelVirtualTransaction(*waitlist, reason);
+				elog(WARNING, "Cancelling pid %d", pid);
 
 				/*
 				 * Wait a little bit for it to die so that we avoid flooding
diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c
index 9cd1d0abe35..cccf4fec033 100644
--- a/src/backend/tcop/postgres.c
+++ b/src/backend/tcop/postgres.c
@@ -3237,7 +3237,7 @@ ProcessRecoveryConflictInterrupts(void)
 	 * us.
 	 */
 	Assert(!proc_exit_inprogress);
-	Assert(InterruptHoldoffCount == 0);
+	//Assert(InterruptHoldoffCount == 0);
 	Assert(RecoveryConflictPending);
 
 	RecoveryConflictPending = false;
@@ -3254,6 +3254,12 @@ ProcessRecoveryConflictInterrupts(void)
 	}
 }
 
+void CheckReoveryInterrupts(void)
+{
+	if (RecoveryConflictPending)
+		ProcessRecoveryConflictInterrupts();
+}
+
 /*
  * ProcessInterrupts: out-of-line portion of CHECK_FOR_INTERRUPTS() macro
  *
diff --git a/src/test/perl/PostgreSQL/Test/Cluster.pm b/src/test/perl/PostgreSQL/Test/Cluster.pm
index f2d9afd398f..0cbf3b69628 100644
--- a/src/test/perl/PostgreSQL/Test/Cluster.pm
+++ b/src/test/perl/PostgreSQL/Test/Cluster.pm
@@ -2387,6 +2387,54 @@ sub pgbench
 
 =pod
 
+=item $node->background_pgbench($opts, $files, \$stdout, $timer) => harness
+
+Invoke B<pgbench> and return an IPC::Run harness object.  The process's stdin
+is empty, and its stdout and stderr go to the $stdout scalar reference.  This
+allows the caller to act on other parts of the system while B<pgbench> is
+running.  Errors from B<pgbench> are the caller's problem.
+
+The specified timer object is attached to the harness, as well.  It's caller's
+responsibility to select the timeout length, and to restart the timer after
+each command if the timeout is per-command.
+
+Be sure to "finish" the harness when done with it.
+
+=over
+
+=item $opts
+
+Options as a string to be split on spaces.
+
+=item $files
+
+Reference to filename/contents dictionary.
+
+=back
+
+=cut
+
+sub background_pgbench
+{
+	my ($self, $opts, $files, $stdout, $timer) = @_;
+
+	my @cmd =
+	  ('pgbench', split(/\s+/, $opts), $self->_pgbench_make_files($files));
+
+	local %ENV = $self->_get_env();
+
+	my $stdin = "";
+	# IPC::Run would otherwise append to existing contents:
+	$$stdout = "" if ref($stdout);
+
+	my $harness = IPC::Run::start \@cmd, '<', \$stdin, '>', $stdout, '2>&1',
+	  $timer;
+
+	return $harness;
+}
+
+=pod
+
 =item $node->connect_ok($connstr, $test_name, %params)
 
 Attempt a connection with a custom connection string.  This is expected
-- 
2.39.5 (Apple Git-154)

