On Wed, Jun 10, 2020 at 08:57:17PM +0300, Alexey Kondratov wrote: > New test reproduces this issue well. Left it running for a couple of hours > in repeat and it seems to be stable.
Thanks for testing. I have been thinking about the minimum xmin and LSN computations on advancing, and actually I have switched the recomputing to be called at the end of pg_replication_slot_advance(). This may be a waste if no advancing is done, but it could also be an advantage to enforce a recalculation of the thresholds for each function call. And that's more consistent with the slot copy, drop and creation. > we can safely use $current_lsn used for pg_replication_slot_advance(), since > reatart_lsn is set as is there. It may make the test a bit simpler as well. We could do that. Now I found cleaner the direct comparison of pg_replication_slots.restart before and after the restart. So I have kept it. -- Michael
diff --git a/src/backend/replication/slotfuncs.c b/src/backend/replication/slotfuncs.c index 1b929a603e..bfa4c874d5 100644 --- a/src/backend/replication/slotfuncs.c +++ b/src/backend/replication/slotfuncs.c @@ -621,6 +621,13 @@ pg_replication_slot_advance(PG_FUNCTION_ARGS) values[0] = NameGetDatum(&MyReplicationSlot->data.name); nulls[0] = false; + /* + * Recompute the minimum LSN and xmin across all slots to adjust with + * the advancing potentially done. + */ + ReplicationSlotsComputeRequiredXmin(false); + ReplicationSlotsComputeRequiredLSN(); + ReplicationSlotRelease(); /* Return the reached position. */ diff --git a/src/test/recovery/t/001_stream_rep.pl b/src/test/recovery/t/001_stream_rep.pl index 0c316c1808..778f11b28b 100644 --- a/src/test/recovery/t/001_stream_rep.pl +++ b/src/test/recovery/t/001_stream_rep.pl @@ -3,7 +3,7 @@ use strict; use warnings; use PostgresNode; use TestLib; -use Test::More tests => 35; +use Test::More tests => 36; # Initialize master node my $node_master = get_new_node('master'); @@ -364,15 +364,26 @@ my $is_replayed = $node_standby_2->safe_psql('postgres', qq[SELECT 1 FROM replayed WHERE val = $newval]); is($is_replayed, qq(1), "standby_2 didn't replay master value $newval"); +# Drop any existing slots on the primary, for the follow-up tests. +$node_master->safe_psql('postgres', + "SELECT pg_drop_replication_slot(slot_name) FROM pg_replication_slots;"); + # Test physical slot advancing and its durability. Create a new slot on # the primary, not used by any of the standbys. This reserves WAL at creation. my $phys_slot = 'phys_slot'; $node_master->safe_psql('postgres', "SELECT pg_create_physical_replication_slot('$phys_slot', true);"); +# Generate some WAL, and switch to a new segment, used to check that +# the previous segment is correctly getting recycled as the slot advancing +# would recompute the minimum LSN calculated across all slots. +my $segment_removed = $node_master->safe_psql('postgres', + 'SELECT pg_walfile_name(pg_current_wal_lsn())'); +chomp($segment_removed); $node_master->psql( 'postgres', " CREATE TABLE tab_phys_slot (a int); - INSERT INTO tab_phys_slot VALUES (generate_series(1,10));"); + INSERT INTO tab_phys_slot VALUES (generate_series(1,10)); + SELECT pg_switch_wal();"); my $current_lsn = $node_master->safe_psql('postgres', "SELECT pg_current_wal_lsn();"); chomp($current_lsn); @@ -392,3 +403,9 @@ my $phys_restart_lsn_post = $node_master->safe_psql('postgres', chomp($phys_restart_lsn_post); ok( ($phys_restart_lsn_pre cmp $phys_restart_lsn_post) == 0, "physical slot advance persists across restarts"); + +# Check if the previous segment gets correctly recycled after the +# server stopped cleanly, causing a shutdown checkpoint to be generated. +my $master_data = $node_master->data_dir; +ok(!-f "$master_data/pg_wal/$segment_removed", + "WAL segment $segment_removed recycled after physical slot advancing");
signature.asc
Description: PGP signature