Le mercredi 27 octobre 2021, 04:17:28 CEST Kyotaro Horiguchi a écrit :
> +my @walfiles = glob "$slot_dir/*";
>
> This is not used.
>
Sorry, fixed in attached version.
> Each pg_receivewal run stalls for about 10 or more seconds before
> finishing, which is not great from the standpoint of recently
> increasing test run time.
> Maybe we want to advance LSN a bit, after taking $nextlsn then pass
> "-s 1" to pg_receivewal.
I incorrectly assumed it was due to the promotion time without looking into
it. In fact, you're right the LSN was not incremented after we fetched the end
lsn, and thus we would wait for quite a while. I fixed that too.
Thank you for the review !
--
Ronan Dunklau
>From 04747199b7be896a62021e4f064b2342234427d5 Mon Sep 17 00:00:00 2001
From: Ronan Dunklau <ronan.dunk...@aiven.io>
Date: Tue, 26 Oct 2021 10:54:12 +0200
Subject: [PATCH v14] Add a test for pg_receivewal following timeline switch.
pg_receivewal is able to follow a timeline switch, but this was not
tested anywher. This test case verify that it works as expected both
when resuming from a replication slot and from the archive directory,
which have different methods of retrieving the timeline it left off.
---
src/bin/pg_basebackup/t/020_pg_receivewal.pl | 86 +++++++++++++++++++-
1 file changed, 85 insertions(+), 1 deletion(-)
diff --git a/src/bin/pg_basebackup/t/020_pg_receivewal.pl b/src/bin/pg_basebackup/t/020_pg_receivewal.pl
index 092c9b6f25..28e4e9b3a4 100644
--- a/src/bin/pg_basebackup/t/020_pg_receivewal.pl
+++ b/src/bin/pg_basebackup/t/020_pg_receivewal.pl
@@ -5,7 +5,7 @@ use strict;
use warnings;
use PostgreSQL::Test::Utils;
use PostgreSQL::Test::Cluster;
-use Test::More tests => 31;
+use Test::More tests => 39;
program_help_ok('pg_receivewal');
program_version_ok('pg_receivewal');
@@ -206,3 +206,87 @@ $primary->command_ok(
"WAL streamed from the slot's restart_lsn");
ok(-e "$slot_dir/$walfile_streamed",
"WAL from the slot's restart_lsn has been archived");
+
+# Test a timeline switch.
+# This test is split in two, using the same standby: one test check the
+# resume-from-folder case, the other the resume-from-slot one.
+
+# Setup a standby for our tests
+my $backup_name = "basebackup";
+$primary->backup($backup_name);
+my $standby = PostgreSQL::Test::Cluster->new("standby");
+$standby->init_from_backup($primary, $backup_name, has_streaming => 1);
+$standby->start;
+
+# Cleanup the previous stream directories to reuse them
+unlink glob "'${stream_dir}/*'";
+unlink glob "'${slot_dir}/*'";
+
+# Create two replication slots.
+# First one is to make sure we keep the wal for the resume-from-folder case.
+my $folder_slot = "folder_slot";
+$standby->psql('',
+ "CREATE_REPLICATION_SLOT $folder_slot PHYSICAL (RESERVE_WAL)",
+ replication => 1);
+# Second one is for testing the resume-from-slot case
+my $archive_slot = "archive_slot";
+$standby->psql('',
+ "CREATE_REPLICATION_SLOT $archive_slot PHYSICAL (RESERVE_WAL)",
+ replication => 1);
+# Get a walfilename from before the promotion to make sure it is archived
+# after promotion
+my $walfile_before_promotion = $primary->safe_psql('postgres',
+ "SELECT pg_walfile_name(pg_current_wal_insert_lsn())");
+# Switch wal to make sure it is not a partial file but a complete segment.
+$primary->psql('postgres',
+ 'INSERT INTO test_table VALUES (generate_series(1,100));');
+$primary->psql('postgres', 'SELECT pg_switch_wal();');
+
+# Populate the stream_dir for the resume-from-folder case.
+$nextlsn = $primary->safe_psql('postgres', 'SELECT pg_current_wal_insert_lsn();');
+chomp($nextlsn);
+$standby->run_log(
+ [ 'pg_receivewal', '-D', $stream_dir, '--verbose', '--endpos', $nextlsn,
+ '--slot', $folder_slot, '--no-sync'],
+ "Stream some wal before promoting");
+
+# Everything is setup, promote the standby to trigger a timeline switch
+$standby->psql(
+ 'postgres',
+ "SELECT pg_promote(wait_seconds => 300)");
+
+# Force a wal switch to make sure at least one full WAL is archived on the new
+# timeline, and fetch this walfilename.
+my $walfile_after_promotion = $standby->safe_psql('postgres',
+ "SELECT pg_walfile_name(pg_current_wal_insert_lsn())");
+$standby->psql('postgres',
+ 'INSERT INTO test_table VALUES (generate_series(1,100));');
+$standby->psql('postgres', 'SELECT pg_switch_wal();');
+$nextlsn =
+ $standby->safe_psql('postgres', 'SELECT pg_current_wal_insert_lsn();');
+chomp($nextlsn);
+
+# Now, try to resume after the promotion, from the folder.
+$standby->command_ok(
+ [ 'pg_receivewal', '-D', $stream_dir, '--verbose', '--endpos', $nextlsn,
+ '--slot', $folder_slot, '--no-sync'],
+ "Stream some wal after promoting, resuming from the folder's position");
+
+ok(-e "$stream_dir/$walfile_before_promotion",
+ "WAL from the old timeline has been archived resuming from the folder");
+ok(-e "$stream_dir/$walfile_after_promotion",
+ "WAL from the new timeline has been archived resuming from the folder");
+ok(-e "$stream_dir/00000002.history",
+ "Timeline history file has been archived resuming from the folder");
+
+# Same thing, resuming from the slot
+$standby->command_ok(
+ [ 'pg_receivewal', '-D', $slot_dir, '--verbose', '--endpos', $nextlsn,
+ '--slot', $archive_slot, '--no-sync'],
+ "Stream some wal after promoting, resuming from the slot's position");
+ok(-e "$slot_dir/$walfile_before_promotion",
+ "WAL from the old timeline has been archived resuming from the slot");
+ok(-e "$slot_dir/$walfile_after_promotion",
+ "WAL from the new timeline has been archived resuming from the slot");
+ok(-e "$slot_dir/00000002.history",
+ "Timeline history file has been archived resuming from the slot");
--
2.33.1