Hi,

On 1/18/23 10:59 AM, Alvaro Herrera wrote:
On 2023-Jan-18, Drouvot, Bertrand wrote:

The current calls are done that way:

wait_for_replay_catchup called:
- 8 times with write LSN as an argument
- 1 time with insert LSN as an argument
- 16 times with flush LSN as an argument

So it looks like that providing a node as a second argument
would not help for the wait_for_replay_catchup() case.

... unless we changed the calls that wait for reply that use write or
insert so that they use flush instead.

That's a good idea, thanks! Please find attached V2 doing so.

Surely everything should still
work, right?

Right.

Flushing would still occur, either right after the write
(as the transaction commits) or ~200ms afterwards when WAL writer
catches up to that point.

I suppose this may fail to be true if there is some test that is
specifically testing whether writing WAL without flushing works, which
should rare enough, but if it does exist,

I don't see this kind of test.

Please note that V2 does not contain wait_for_flush_catchup() and
wait_for_sent_catchup() anymore as: 1) they are not used yet
and 2) it lets to their author (if any) decide the node->lsn() mode to be used.

This is also mentioned as a comment in V2.

Regards,

--
Bertrand Drouvot
PostgreSQL Contributors Team
RDS Open Source Databases
Amazon Web Services: https://aws.amazon.com
diff --git a/src/bin/pg_rewind/t/007_standby_source.pl 
b/src/bin/pg_rewind/t/007_standby_source.pl
index 52644c2c0d..7236a3b177 100644
--- a/src/bin/pg_rewind/t/007_standby_source.pl
+++ b/src/bin/pg_rewind/t/007_standby_source.pl
@@ -74,9 +74,8 @@ $node_a->safe_psql('postgres',
        "INSERT INTO tbl1 values ('in A, before promotion')");
 $node_a->safe_psql('postgres', 'CHECKPOINT');
 
-my $lsn = $node_a->lsn('write');
-$node_a->wait_for_catchup('node_b', 'write', $lsn);
-$node_b->wait_for_catchup('node_c', 'write', $lsn);
+$node_a->wait_for_write_catchup('node_b', $node_a);
+$node_b->wait_for_write_catchup('node_c', $node_a);
 
 # Promote C
 #
@@ -160,7 +159,7 @@ in A, after C was promoted
 $node_a->safe_psql('postgres',
        "INSERT INTO tbl1 values ('in A, after rewind')");
 
-$node_b->wait_for_catchup('node_c', 'replay', $node_a->lsn('write'));
+$node_b->wait_for_replay_catchup('node_c', $node_a);
 
 check_query(
        'SELECT * FROM tbl1',
diff --git a/src/test/modules/brin/t/02_wal_consistency.pl 
b/src/test/modules/brin/t/02_wal_consistency.pl
index 5983ef208e..7211ba8d8d 100644
--- a/src/test/modules/brin/t/02_wal_consistency.pl
+++ b/src/test/modules/brin/t/02_wal_consistency.pl
@@ -70,6 +70,6 @@ my ($ret, $out, $err) = $whiskey->psql(
        });
 cmp_ok($out, '>=', 1);
 
-$whiskey->wait_for_catchup($charlie, 'replay', $whiskey->lsn('insert'));
+$whiskey->wait_for_replay_catchup($charlie, $whiskey);
 
 done_testing();
diff --git a/src/test/perl/PostgreSQL/Test/Cluster.pm 
b/src/test/perl/PostgreSQL/Test/Cluster.pm
index 04921ca3a3..cea9796c0c 100644
--- a/src/test/perl/PostgreSQL/Test/Cluster.pm
+++ b/src/test/perl/PostgreSQL/Test/Cluster.pm
@@ -2709,6 +2709,44 @@ sub wait_for_catchup
        return;
 }
 
+# Now defining helper functions wait_for_replay_catchup() and
+# wait_for_write_catchup().
+# Please note that wait_for_flush_catchup() and wait_for_sent_catchup() are not
+# defined as: 1) they are not used yet and 2) it lets their author (if any)
+# decide the node->lsn() mode to be used.
+
+=pod
+
+=item $node->wait_for_replay_catchup(standby_name, node)
+
+Helper function for wait_for_catchup when waiting for the replay_lsn column
+to catchup.
+
+=cut
+
+sub wait_for_replay_catchup
+{
+       my ($self, $standby_name, $node) = @_;
+
+       $self->wait_for_catchup($standby_name, 'replay', $node->lsn('flush'));
+}
+
+=pod
+
+=item $node->wait_for_write_catchup(standby_name, node)
+
+Helper function for wait_for_catchup when waiting for the write_lsn column
+to catchup.
+
+=cut
+
+sub wait_for_write_catchup
+{
+       my ($self, $standby_name, $node) = @_;
+
+       $self->wait_for_catchup($standby_name, 'write', $node->lsn('write'));
+}
+
 =pod
 
 =item $node->wait_for_slot_catchup(slot_name, mode, target_lsn)
diff --git a/src/test/recovery/t/001_stream_rep.pl 
b/src/test/recovery/t/001_stream_rep.pl
index 23a90dd85b..5b15a20d54 100644
--- a/src/test/recovery/t/001_stream_rep.pl
+++ b/src/test/recovery/t/001_stream_rep.pl
@@ -47,9 +47,8 @@ $node_primary->safe_psql('postgres',
        "CREATE TABLE tab_int AS SELECT generate_series(1,1002) AS a");
 
 # Wait for standbys to catch up
-my $primary_lsn = $node_primary->lsn('write');
-$node_primary->wait_for_catchup($node_standby_1, 'replay', $primary_lsn);
-$node_standby_1->wait_for_catchup($node_standby_2, 'replay', $primary_lsn);
+$node_primary->wait_for_replay_catchup($node_standby_1, $node_primary);
+$node_standby_1->wait_for_replay_catchup($node_standby_2, $node_primary);
 
 my $result =
   $node_standby_1->safe_psql('postgres', "SELECT count(*) FROM tab_int");
@@ -66,9 +65,8 @@ $node_primary->safe_psql('postgres',
        "CREATE SEQUENCE seq1; SELECT nextval('seq1')");
 
 # Wait for standbys to catch up
-$primary_lsn = $node_primary->lsn('write');
-$node_primary->wait_for_catchup($node_standby_1, 'replay', $primary_lsn);
-$node_standby_1->wait_for_catchup($node_standby_2, 'replay', $primary_lsn);
+$node_primary->wait_for_replay_catchup($node_standby_1, $node_primary);
+$node_standby_1->wait_for_replay_catchup($node_standby_2, $node_primary);
 
 $result = $node_standby_1->safe_psql('postgres', "SELECT * FROM seq1");
 print "standby 1: $result\n";
@@ -372,10 +370,8 @@ sub replay_check
        my $newval = $node_primary->safe_psql('postgres',
                'INSERT INTO replayed(val) SELECT coalesce(max(val),0) + 1 AS 
newval FROM replayed RETURNING val'
        );
-       my $primary_lsn = $node_primary->lsn('write');
-       $node_primary->wait_for_catchup($node_standby_1, 'replay', 
$primary_lsn);
-       $node_standby_1->wait_for_catchup($node_standby_2, 'replay',
-               $primary_lsn);
+       $node_primary->wait_for_replay_catchup($node_standby_1, $node_primary);
+       $node_standby_1->wait_for_replay_catchup($node_standby_2, 
$node_primary);
 
        $node_standby_1->safe_psql('postgres',
                qq[SELECT 1 FROM replayed WHERE val = $newval])
diff --git a/src/test/recovery/t/010_logical_decoding_timelines.pl 
b/src/test/recovery/t/010_logical_decoding_timelines.pl
index eb1a3b6ef8..57b32a70a7 100644
--- a/src/test/recovery/t/010_logical_decoding_timelines.pl
+++ b/src/test/recovery/t/010_logical_decoding_timelines.pl
@@ -135,7 +135,7 @@ cmp_ok(
        'xmin on physical slot must not be lower than catalog_xmin');
 
 $node_primary->safe_psql('postgres', 'CHECKPOINT');
-$node_primary->wait_for_catchup($node_replica, 'write');
+$node_primary->wait_for_write_catchup($node_replica, $node_primary);
 
 # Boom, crash
 $node_primary->stop('immediate');
diff --git a/src/test/recovery/t/027_stream_regress.pl 
b/src/test/recovery/t/027_stream_regress.pl
index 69d6ddf281..303cb42c13 100644
--- a/src/test/recovery/t/027_stream_regress.pl
+++ b/src/test/recovery/t/027_stream_regress.pl
@@ -86,8 +86,7 @@ $node_primary->psql('regression',
        "select setval(seqrelid, nextval(seqrelid)) from pg_sequence");
 
 # Wait for standby to catch up
-$node_primary->wait_for_catchup($node_standby_1, 'replay',
-       $node_primary->lsn('insert'));
+$node_primary->wait_for_replay_catchup($node_standby_1, $node_primary);
 
 # Perform a logical dump of primary and standby, and check that they match
 command_ok(
diff --git a/src/test/recovery/t/030_stats_cleanup_replica.pl 
b/src/test/recovery/t/030_stats_cleanup_replica.pl
index f1121e4b12..7c3fa475e5 100644
--- a/src/test/recovery/t/030_stats_cleanup_replica.pl
+++ b/src/test/recovery/t/030_stats_cleanup_replica.pl
@@ -38,8 +38,7 @@ drop_table_by_oid('postgres', $tableoid);
 drop_function_by_oid('postgres', $funcoid);
 
 $sect = 'post drop';
-my $primary_lsn = $node_primary->lsn('flush');
-$node_primary->wait_for_catchup($node_standby, 'replay', $primary_lsn);
+$node_primary->wait_for_replay_catchup($node_standby, $node_primary);
 test_standby_func_tab_stats_status('postgres',
        $dboid, $tableoid, $funcoid, 'f');
 
@@ -49,8 +48,7 @@ test_standby_func_tab_stats_status('postgres',
 $sect = "schema creation";
 
 $node_primary->safe_psql('postgres', "CREATE SCHEMA drop_schema_test1");
-$primary_lsn = $node_primary->lsn('flush');
-$node_primary->wait_for_catchup($node_standby, 'replay', $primary_lsn);
+$node_primary->wait_for_replay_catchup($node_standby, $node_primary);
 
 ($dboid, $tableoid, $funcoid) =
   populate_standby_stats('postgres', 'drop_schema_test1');
@@ -61,8 +59,7 @@ $node_primary->safe_psql('postgres', "DROP SCHEMA 
drop_schema_test1 CASCADE");
 
 $sect = "post schema drop";
 
-$primary_lsn = $node_primary->lsn('flush');
-$node_primary->wait_for_catchup($node_standby, 'replay', $primary_lsn);
+$node_primary->wait_for_replay_catchup($node_standby, $node_primary);
 
 # verify table and function stats removed from standby
 test_standby_func_tab_stats_status('postgres',
@@ -74,8 +71,7 @@ test_standby_func_tab_stats_status('postgres',
 $sect = "createdb";
 
 $node_primary->safe_psql('postgres', "CREATE DATABASE test");
-$primary_lsn = $node_primary->lsn('flush');
-$node_primary->wait_for_catchup($node_standby, 'replay', $primary_lsn);
+$node_primary->wait_for_replay_catchup($node_standby, $node_primary);
 
 ($dboid, $tableoid, $funcoid) = populate_standby_stats('test', 'public');
 
@@ -85,8 +81,7 @@ test_standby_db_stats_status('test', $dboid, 't');
 
 $node_primary->safe_psql('postgres', "DROP DATABASE test");
 $sect        = "post dropdb";
-$primary_lsn = $node_primary->lsn('flush');
-$node_primary->wait_for_catchup($node_standby, 'replay', $primary_lsn);
+$node_primary->wait_for_replay_catchup($node_standby, $node_primary);
 
 # Test that the stats were cleaned up on standby
 # Note that this connects to 'postgres' but provides the dboid of dropped db
@@ -137,8 +132,7 @@ sub populate_standby_stats
        $node_primary->safe_psql($connect_db,
                "CREATE FUNCTION $schema.drop_func_test1() RETURNS VOID AS 
'select 2;' LANGUAGE SQL IMMUTABLE"
        );
-       my $primary_lsn = $node_primary->lsn('flush');
-       $node_primary->wait_for_catchup($node_standby, 'replay', $primary_lsn);
+       $node_primary->wait_for_replay_catchup($node_standby, $node_primary);
 
        # collect object oids
        my $dboid = $node_standby->safe_psql($connect_db,
diff --git a/src/test/recovery/t/031_recovery_conflict.pl 
b/src/test/recovery/t/031_recovery_conflict.pl
index 875afb8e3c..a5752854a7 100644
--- a/src/test/recovery/t/031_recovery_conflict.pl
+++ b/src/test/recovery/t/031_recovery_conflict.pl
@@ -63,8 +63,7 @@ CREATE TABLE ${table1}(a int, b int);
 INSERT INTO $table1 SELECT i % 3, 0 FROM generate_series(1,20) i;
 CREATE TABLE ${table2}(a int, b int);
 ]);
-my $primary_lsn = $node_primary->lsn('flush');
-$node_primary->wait_for_catchup($node_standby, 'replay', $primary_lsn);
+$node_primary->wait_for_replay_catchup($node_standby, $node_primary);
 
 
 # a longrunning psql that we can use to trigger conflicts
@@ -97,8 +96,7 @@ $node_primary->safe_psql(
        BEGIN; LOCK $table1; COMMIT;
        ]);
 
-$primary_lsn = $node_primary->lsn('flush');
-$node_primary->wait_for_catchup($node_standby, 'replay', $primary_lsn);
+$node_primary->wait_for_replay_catchup($node_standby, $node_primary);
 
 my $cursor1 = "test_recovery_conflict_cursor";
 
@@ -124,8 +122,7 @@ $node_primary->safe_psql($test_db, qq[VACUUM $table1;]);
 # finished, so waiting for catchup ensures that there is no race between
 # encountering the recovery conflict which causes the disconnect and checking
 # the logfile for the terminated connection.
-$primary_lsn = $node_primary->lsn('flush');
-$node_primary->wait_for_catchup($node_standby, 'replay', $primary_lsn);
+$node_primary->wait_for_replay_catchup($node_standby, $node_primary);
 
 check_conflict_log("User was holding shared buffer pin for too long");
 reconnect_and_clear();
@@ -138,8 +135,7 @@ $expected_conflicts++;
 
 $node_primary->safe_psql($test_db,
        qq[INSERT INTO $table1 SELECT i, 0 FROM generate_series(1,20) i]);
-$primary_lsn = $node_primary->lsn('flush');
-$node_primary->wait_for_catchup($node_standby, 'replay', $primary_lsn);
+$node_primary->wait_for_replay_catchup($node_standby, $node_primary);
 
 # DECLARE and FETCH from cursor on the standby
 $psql_standby{stdin} .= qq[
@@ -160,8 +156,7 @@ $node_primary->safe_psql($test_db,
 $node_primary->safe_psql($test_db, qq[VACUUM $table1;]);
 
 # Wait for attempted replay of PRUNE records
-$primary_lsn = $node_primary->lsn('flush');
-$node_primary->wait_for_catchup($node_standby, 'replay', $primary_lsn);
+$node_primary->wait_for_replay_catchup($node_standby, $node_primary);
 
 check_conflict_log(
        "User query might have needed to see row versions that must be 
removed");
@@ -184,8 +179,7 @@ ok(pump_until_standby(qr/^1$/m), "$sect: conflicting lock 
acquired");
 # DROP TABLE containing block which standby has in a pinned buffer
 $node_primary->safe_psql($test_db, qq[DROP TABLE $table1;]);
 
-$primary_lsn = $node_primary->lsn('flush');
-$node_primary->wait_for_catchup($node_standby, 'replay', $primary_lsn);
+$node_primary->wait_for_replay_catchup($node_standby, $node_primary);
 
 check_conflict_log("User was holding a relation lock for too long");
 reconnect_and_clear();
@@ -213,8 +207,7 @@ ok(pump_until_standby(qr/^6000$/m),
 # standby
 $node_primary->safe_psql($test_db, qq[DROP TABLESPACE $tablespace1;]);
 
-$primary_lsn = $node_primary->lsn('flush');
-$node_primary->wait_for_catchup($node_standby, 'replay', $primary_lsn);
+$node_primary->wait_for_replay_catchup($node_standby, $node_primary);
 
 check_conflict_log(
        "User was or might have been using tablespace that must be dropped");
@@ -255,8 +248,7 @@ INSERT INTO $table1(a) VALUES (170);
 SELECT txid_current();
 ]);
 
-$primary_lsn = $node_primary->lsn('flush');
-$node_primary->wait_for_catchup($node_standby, 'replay', $primary_lsn);
+$node_primary->wait_for_replay_catchup($node_standby, $node_primary);
 
 $psql_standby{stdin} .= qq[
     BEGIN;
@@ -282,8 +274,7 @@ SELECT 'waiting' FROM pg_locks WHERE locktype = 'relation' 
AND NOT granted;
 # VACUUM will prune away rows, causing a buffer pin conflict, while standby
 # psql is waiting on lock
 $node_primary->safe_psql($test_db, qq[VACUUM $table1;]);
-$primary_lsn = $node_primary->lsn('flush');
-$node_primary->wait_for_catchup($node_standby, 'replay', $primary_lsn);
+$node_primary->wait_for_replay_catchup($node_standby, $node_primary);
 
 check_conflict_log("User transaction caused buffer deadlock with recovery.");
 reconnect_and_clear();
@@ -311,8 +302,7 @@ $sect = "database conflict";
 
 $node_primary->safe_psql('postgres', qq[DROP DATABASE $test_db;]);
 
-$primary_lsn = $node_primary->lsn('flush');
-$node_primary->wait_for_catchup($node_standby, 'replay', $primary_lsn);
+$node_primary->wait_for_replay_catchup($node_standby, $node_primary);
 
 check_conflict_log("User was connected to a database that must be dropped");
 
diff --git a/src/test/recovery/t/033_replay_tsp_drops.pl 
b/src/test/recovery/t/033_replay_tsp_drops.pl
index 896b282bd4..ca49e2d4dc 100644
--- a/src/test/recovery/t/033_replay_tsp_drops.pl
+++ b/src/test/recovery/t/033_replay_tsp_drops.pl
@@ -42,8 +42,7 @@ sub test_tablespace
        $node_standby->start;
 
        # Make sure the connection is made
-       $node_primary->wait_for_catchup($node_standby, 'write',
-               $node_primary->lsn('write'));
+       $node_primary->wait_for_write_catchup($node_standby, $node_primary);
 
        # Do immediate shutdown just after a sequence of CREATE DATABASE / DROP
        # DATABASE / DROP TABLESPACE. This causes CREATE DATABASE WAL records
@@ -65,8 +64,7 @@ sub test_tablespace
        $query =~ s/<STRATEGY>/$strategy/g;
 
        $node_primary->safe_psql('postgres', $query);
-       $node_primary->wait_for_catchup($node_standby, 'write',
-               $node_primary->lsn('write'));
+       $node_primary->wait_for_write_catchup($node_standby, $node_primary);
 
        # show "create missing directory" log message
        $node_standby->safe_psql('postgres',

Reply via email to