From d45b516bea3f80c5e9bf1fcd9fd2004de52d872e Mon Sep 17 00:00:00 2001
From: Hayato Kuroda <kuroda.hayato@fujitsu.com>
Date: Mon, 16 Feb 2026 11:58:11 +0900
Subject: [PATCH v4] Stabilize 009_twophase.pl

009_twophase.pl does switchover several times, but sometimes the old primary
could not follow the new primary, with the log like below:

```
LOG: fetching timeline history file for timeline 2 from primary server
LOG:  started streaming WAL from primary at 0/03000000 on timeline 1
LOG: replication terminated by primary server
DETAIL:  End of WAL reached on timeline 1 at 0/03022070.
LOG:  new timeline 2 forked off current database system timeline  1 before current recovery point 0/030220B8
LOG: restarted WAL streaming at 0/03000000 on timeline 1
LOG: replication terminated by primary server
```

This issue could occur for two reasons.

1) An old primary shuts down before all changes are replicated.
2) A background writer on the old primary generated the RUNNING_XACTS record,
and the node shut down before sending it.

This commit addresses both. Regarding the first issue, wait_for_replay_catchup()
has been added to ensure that all changes are replicated.
As for the second issue, the injection_points extension is used to suppress the
generation of RUNNING_XACTS records. For now, this test can run without
injection_points, but random failures can still occur in such environments.

Author: Hayato Kuroda <kuroda.hayato@fujitsu.com>
Reviewed-by: Alexander Lakhin <exclusion@gmail.com>
Reviewed-by: Michael Paquier <michael@paquier.xyz>
---
 src/test/recovery/t/009_twophase.pl | 54 +++++++++++++++++++++++++++--
 1 file changed, 52 insertions(+), 2 deletions(-)

diff --git a/src/test/recovery/t/009_twophase.pl b/src/test/recovery/t/009_twophase.pl
index 879e493b5b8..6ec72370c70 100644
--- a/src/test/recovery/t/009_twophase.pl
+++ b/src/test/recovery/t/009_twophase.pl
@@ -12,6 +12,16 @@ use Test::More;
 my $psql_out = '';
 my $psql_rc = '';
 
+# This test needs an injection point to avoid generating xl_running_xacts.
+# Because this test does the switch over several times, and if the record is
+# generated on the old primary between the switchover, it might be put only on
+# the old timeline of the WAL. Hence, the old primary cannot start following
+# the new primary in this case.
+if ($ENV{enable_injection_points} ne 'yes')
+{
+	plan skip_all => 'Injection points not supported by this build';
+}
+
 sub configure_and_reload
 {
 	local $Test::Builder::Level = $Test::Builder::Level + 1;
@@ -30,24 +40,35 @@ sub configure_and_reload
 
 # Set up two nodes, which will alternately be primary and replication standby.
 
-# Setup london node
+# Setup london node.
+#
+# Here, we avoid both checkpoint and autovacuum during the test. Otherwise, WAL
+# records can be generated between the switchover, and the old primary cannot
+# start following the new primary.
 my $node_london = PostgreSQL::Test::Cluster->new("london");
 $node_london->init(allows_streaming => 1);
 $node_london->append_conf(
 	'postgresql.conf', qq(
 	max_prepared_transactions = 10
 	log_checkpoints = true
+	autovacuum = off
+	checkpoint_timeout = 1h
 ));
 $node_london->start;
 $node_london->backup('london_backup');
 
-# Setup paris node
+# Setup paris node.
+#
+# Avoid both checkpoint and autovacuum to stabilize the test. See comments atop
+# setting upt the london node.
 my $node_paris = PostgreSQL::Test::Cluster->new('paris');
 $node_paris->init_from_backup($node_london, 'london_backup',
 	has_streaming => 1);
 $node_paris->append_conf(
 	'postgresql.conf', qq(
 	subtransaction_buffers = 32
+	autovacuum = off
+	checkpoint_timeout = 1h
 ));
 $node_paris->start;
 
@@ -63,6 +84,16 @@ my $cur_primary_name = $cur_primary->name;
 # Create table we'll use in the test transactions
 $cur_primary->psql('postgres', "CREATE TABLE t_009_tbl (id int, msg text)");
 
+# Check if the extension injection_points is available, as it may be
+# possible that this script is run with installcheck, where the module
+# would not be installed by default.
+if (!$cur_primary->check_extension('injection_points'))
+{
+	plan skip_all => 'Extension injection_points not installed';
+}
+
+$cur_primary->safe_psql('postgres', 'CREATE EXTENSION injection_points;');
+
 ###############################################################################
 # Check that we can commit and abort transaction after soft restart.
 # Here checkpoint happens before shutdown and no WAL replay will occur at next
@@ -158,6 +189,12 @@ $cur_primary->psql(
 	COMMIT PREPARED 'xact_009_6';");
 $cur_primary->teardown_node;
 $cur_primary->start;
+
+# Attach an injection point to avoid seeing xl_running_xacts records. See
+# comments at the beginning of the file.
+$cur_primary->safe_psql('postgres',
+	"SELECT injection_points_attach('skip-log-running-xacts', 'error');");
+
 $psql_rc = $cur_primary->psql(
 	'postgres', "
 	BEGIN;
@@ -222,6 +259,7 @@ $cur_primary->psql(
 	SAVEPOINT s1;
 	INSERT INTO t_009_tbl VALUES (22, 'issued to ${cur_primary_name}');
 	PREPARE TRANSACTION 'xact_009_10';");
+$cur_primary->wait_for_replay_catchup($cur_standby);
 $cur_primary->teardown_node;
 $cur_standby->promote;
 
@@ -230,6 +268,11 @@ note "Now paris is primary and london is standby";
 ($cur_primary, $cur_standby) = ($node_paris, $node_london);
 $cur_primary_name = $cur_primary->name;
 
+# Attach an injection point to avoid seeing xl_running_xacts records. See
+# comments at the beginning of the file.
+$cur_primary->safe_psql('postgres',
+	"SELECT injection_points_attach('skip-log-running-xacts', 'error');");
+
 # because london is not running at this point, we can't use syncrep commit
 # on this command
 $psql_rc = $cur_primary->psql('postgres',
@@ -254,6 +297,7 @@ $cur_primary->psql(
 	SAVEPOINT s1;
 	INSERT INTO t_009_tbl VALUES (24, 'issued to ${cur_primary_name}');
 	PREPARE TRANSACTION 'xact_009_11';");
+$cur_primary->wait_for_replay_catchup($cur_standby);
 $cur_primary->stop;
 $cur_standby->restart;
 $cur_standby->promote;
@@ -263,6 +307,11 @@ note "Now london is primary and paris is standby";
 ($cur_primary, $cur_standby) = ($node_london, $node_paris);
 $cur_primary_name = $cur_primary->name;
 
+# Attach an injection point to avoid seeing xl_running_xacts records. See
+# comments at the beginning of the file.
+$cur_primary->safe_psql('postgres',
+	"SELECT injection_points_attach('skip-log-running-xacts', 'error');");
+
 $cur_primary->psql(
 	'postgres',
 	"SELECT count(*) FROM pg_prepared_xacts",
@@ -289,6 +338,7 @@ $cur_primary->psql(
 	INSERT INTO t_009_tbl VALUES (26, 'issued to ${cur_primary_name}');
 	PREPARE TRANSACTION 'xact_009_12';
 	");
+$cur_primary->wait_for_replay_catchup($cur_standby);
 $cur_primary->stop;
 $cur_standby->teardown_node;
 $cur_standby->start;
-- 
2.47.3

