Hi On 2019-Jan-30, Konstantin Knizhnik wrote:
> One of our customers was faced with the following problem: he has > setup physical primary-slave replication but for some reasons > specified very large (~12 hours) recovery_min_apply_delay. We also came across this exact same problem some time ago. It's pretty nasty. I wrote a quick TAP reproducer, attached (needed a quick patch for PostgresNode itself too.) I tried several failed strategies: 1. setting lastSourceFailed just before sleeping for apply delay, with the idea that for the next fetch we would try stream. But this doesn't work because WaitForWalToBecomeAvailable is not executed. 2. split WaitForWalToBecomeAvailable in two pieces, so that we can call the first half in the restore loop. But this causes 1s of wait between segments (error recovery) and we never actually catch up. What back then I thought was the *real* solution but I didn't get around to implementing is the idea you describe to start a walreceiver at an earlier point. > I wonder if it can be considered as acceptable solution of the problem or > there can be some better approach? I didn't find one. -- Álvaro Herrera https://www.2ndQuadrant.com/ PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services
>From 5d16db9a8308692f66b2836432fe84fbbec3e81f Mon Sep 17 00:00:00 2001 From: Alvaro Herrera <alvhe...@alvh.no-ip.org> Date: Fri, 17 Aug 2018 14:20:47 -0300 Subject: [PATCH] Support pg_basebackup -S in PostgresNode->backup() --- src/test/perl/PostgresNode.pm | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/test/perl/PostgresNode.pm b/src/test/perl/PostgresNode.pm index d9aeb277d9..2442251683 100644 --- a/src/test/perl/PostgresNode.pm +++ b/src/test/perl/PostgresNode.pm @@ -488,6 +488,9 @@ Create a hot backup with B<pg_basebackup> in subdirectory B<backup_name> of B<< $node->backup_dir >>, including the WAL. WAL files fetched at the end of the backup, not streamed. +The keyword parameter replication_slot => 'myslot' can be used for the B<-S> +argument to B<pg_basebackup>. + You'll have to configure a suitable B<max_wal_senders> on the target server since it isn't done by default. @@ -495,14 +498,17 @@ target server since it isn't done by default. sub backup { - my ($self, $backup_name) = @_; + my ($self, $backup_name, %params) = @_; my $backup_path = $self->backup_dir . '/' . $backup_name; my $port = $self->port; my $name = $self->name; + my @cmd = ("pg_basebackup", "-D", $backup_path, "-p", $port, "--no-sync"); + push @cmd, '-S', $params{replication_slot} + if defined $params{replication_slot}; + print "# Taking pg_basebackup $backup_name from node \"$name\"\n"; - TestLib::system_or_bail('pg_basebackup', '-D', $backup_path, '-p', $port, - '--no-sync'); + TestLib::system_or_bail(@cmd); print "# Backup finished\n"; } -- 2.11.0
# Test streaming with replication delay use strict; use warnings; use PostgresNode; use TestLib; use IPC::Run; use Test::More; my $golf = get_new_node('golf'); my $foxtrot = get_new_node('foxtrot'); $foxtrot->init(allows_streaming => 1, has_archiving => 1); $foxtrot->append_conf('postgresql.conf', 'log_connections=on'); $foxtrot->start(); $foxtrot->safe_psql('postgres', 'select pg_create_physical_replication_slot(\'golf\')'); $foxtrot->backup('backup', replication_slot => 'golf'); $golf->init_from_backup($foxtrot, 'backup', has_streaming => 1, has_restoring => 1); $golf->append_conf('recovery.conf', 'recovery_min_apply_delay = 1min'); $golf->append_conf('recovery.conf', 'primary_slot_name = \'golf\''); system("pgbench", "-is10", "-p", $foxtrot->port); my ($stat, $slots); note("insert lsn: ". $foxtrot->safe_psql('postgres', 'select pg_current_wal_insert_lsn()')); note("repl slot restart: ". $foxtrot->safe_psql('postgres', 'select restart_lsn from pg_replication_slots')); $slots = $foxtrot->safe_psql('postgres', 'select slot_name,slot_type from pg_replication_slots'); ok($slots eq "golf|physical", "replication slot looks good"); # 80000 transactions should be enough to fill just over 3 segments my $h = IPC::Run::start(['pgbench', '-R100', '-P1', '-T10000', $foxtrot->connstr('postgres')]); sleep 62; note("insert lsn: ". $foxtrot->safe_psql('postgres', 'select pg_current_wal_insert_lsn()')); note("repl slot restart: ". $foxtrot->safe_psql('postgres', 'select restart_lsn from pg_replication_slots')); $golf->start; while (1) { $stat = $foxtrot->safe_psql('postgres', 'select * from pg_stat_replication'); $slots = $foxtrot->safe_psql('postgres', 'select * from pg_replication_slots'); ok(1, "stat: $stat repl: $slots"); if ($stat ne '') { done_testing(); exit 0; } sleep(10); }