On Fri, 31 Jan 2025 at 17:50, Nisha Moond <nisha.moond...@gmail.com> wrote: > > Thanks for the review! I have incorporated the above comments. The > test in patch-002 has been optimized as suggested and now completes in > less than a second. > Please find the attached v66 patch set. The base patch(v65-001) is > committed now, so I have rebased the patches.
Few comments: 1)We should set inactive_since only if the slot can be invalidated: + /* For testing timeout slot invalidation */ + if (IS_INJECTION_POINT_ATTACHED("slot-time-out-inval")) + s->inactive_since = 1; + 2) Instead of "alter system set" and reload, let's do this in $node->append_conf itself: +# Set timeout GUC so that the next checkpoint will invalidate inactive slots +$node->safe_psql( + 'postgres', qq[ + ALTER SYSTEM SET idle_replication_slot_timeout TO '1min'; +]); +$node->reload; 3) No need to trigger checkpoint twice, we can move it outside so that just a single checkpoint will invalidate both the slots: +sub trigger_slot_invalidation +{ + my ($node, $slot, $offset) = @_; + my $node_name = $node->name; + my $invalidated = 0; + + # Run a checkpoint + $node->safe_psql('postgres', "CHECKPOINT"); 4) I fel this trigger_slot_invalidation is not required after removing the checkpoint from the function, let's move the waiting for "invalidating obsolete replication slot" also to wait_for_slot_invalidation function: + # The slot's invalidation should be logged + $node->wait_for_log(qr/invalidating obsolete replication slot \"$slot\"/, + $offset); + + # Check that the invalidation reason is 'idle_timeout' + $node->poll_query_until( + 'postgres', qq[ + SELECT COUNT(slot_name) = 1 FROM pg_replication_slots + WHERE slot_name = '$slot' AND + invalidation_reason = 'idle_timeout'; + ]) 5) Can we move the subroutine to the beginning, I noticed in other places we have kept it before the tests like in 027_nosuperuser and 040_createsubscriber: +# Wait for slot to first become idle and then get invalidated +sub wait_for_slot_invalidation +{ + my ($node, $slot, $offset) = @_; + my $node_name = $node->name; + + trigger_slot_invalidation($node, $slot, $offset); + + # Check that an invalidated slot cannot be acquired + my ($result, $stdout, $stderr); + ($result, $stdout, $stderr) = $node->psql( + 'postgres', qq[ + SELECT pg_replication_slot_advance('$slot', '0/1'); + ]); 6) Since idle_replication_slot_timeout is related more closely with max_slot_wal_keep_size, let's keep it along with it. diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index 079efa1baa..0ed9eb057e 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -329,6 +329,7 @@ #wal_sender_timeout = 60s # in milliseconds; 0 disables #track_commit_timestamp = off # collect timestamp of transaction commit # (change requires restart) +#idle_replication_slot_timeout = 1d # in minutes; 0 disables If you accept the comments, you can merge the changes from the attached patch. Regards, Vignesh
diff --git a/src/backend/replication/slot.c b/src/backend/replication/slot.c index c09b90eeac..e81bce348c 100644 --- a/src/backend/replication/slot.c +++ b/src/backend/replication/slot.c @@ -1724,20 +1724,22 @@ InvalidatePossiblyObsoleteSlot(ReplicationSlotInvalidationCause cause, case RS_INVAL_IDLE_TIMEOUT: Assert(now > 0); - /* For testing timeout slot invalidation */ - if (IS_INJECTION_POINT_ATTACHED("slot-time-out-inval")) - s->inactive_since = 1; - - /* - * Check if the slot needs to be invalidated due to - * idle_replication_slot_timeout GUC. - */ - if (CanInvalidateIdleSlot(s) && - TimestampDifferenceExceedsSeconds(s->inactive_since, now, - idle_replication_slot_timeout_mins * SECS_PER_MINUTE)) + if (CanInvalidateIdleSlot(s)) { - invalidation_cause = cause; - inactive_since = s->inactive_since; + /* For testing timeout slot invalidation */ + if (IS_INJECTION_POINT_ATTACHED("slot-time-out-inval")) + s->inactive_since = 1; + + /* + * Check if the slot needs to be invalidated due to + * idle_replication_slot_timeout GUC. + */ + if (TimestampDifferenceExceedsSeconds(s->inactive_since, now, + idle_replication_slot_timeout_mins * SECS_PER_MINUTE)) + { + invalidation_cause = cause; + inactive_since = s->inactive_since; + } } break; case RS_INVAL_NONE: diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index 0ed9eb057e..70be3a2ce5 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -326,10 +326,10 @@ # (change requires restart) #wal_keep_size = 0 # in megabytes; 0 disables #max_slot_wal_keep_size = -1 # in megabytes; -1 disables +#idle_replication_slot_timeout = 1d # in minutes; 0 disables #wal_sender_timeout = 60s # in milliseconds; 0 disables #track_commit_timestamp = off # collect timestamp of transaction commit # (change requires restart) -#idle_replication_slot_timeout = 1d # in minutes; 0 disables # - Primary Server - diff --git a/src/test/recovery/t/044_invalidate_inactive_slots.pl b/src/test/recovery/t/044_invalidate_inactive_slots.pl index fd907e7f82..7f18881cba 100644 --- a/src/test/recovery/t/044_invalidate_inactive_slots.pl +++ b/src/test/recovery/t/044_invalidate_inactive_slots.pl @@ -13,6 +13,39 @@ if ($ENV{enable_injection_points} ne 'yes') plan skip_all => 'Injection points not supported by this build'; } +# Wait for slot to first become idle and then get invalidated +sub wait_for_slot_invalidation +{ + my ($node, $slot, $offset) = @_; + my $node_name = $node->name; + + # The slot's invalidation should be logged + $node->wait_for_log(qr/invalidating obsolete replication slot \"$slot\"/, + $offset); + + # Check that the invalidation reason is 'idle_timeout' + $node->poll_query_until( + 'postgres', qq[ + SELECT COUNT(slot_name) = 1 FROM pg_replication_slots + WHERE slot_name = '$slot' AND + invalidation_reason = 'idle_timeout'; + ]) + or die + "Timed out while waiting for invalidation reason of slot $slot to be set on node $node_name"; + + # Check that an invalidated slot cannot be acquired + my ($result, $stdout, $stderr); + ($result, $stdout, $stderr) = $node->psql( + 'postgres', qq[ + SELECT pg_replication_slot_advance('$slot', '0/1'); + ]); + ok( $stderr =~ /can no longer access replication slot "$slot"/, + "detected error upon trying to acquire invalidated slot $slot on node $node_name" + ) + or die + "could not detect error upon trying to acquire invalidated slot $slot on node $node_name"; +} + # ======================================================================== # Testcase start # @@ -27,6 +60,7 @@ $node->init(allows_streaming => 'logical'); $node->append_conf( 'postgresql.conf', qq{ checkpoint_timeout = 1h +idle_replication_slot_timeout = 1min }); $node->start; @@ -41,13 +75,6 @@ $node->psql('postgres', my $logstart = -s $node->logfile; -# Set timeout GUC so that the next checkpoint will invalidate inactive slots -$node->safe_psql( - 'postgres', qq[ - ALTER SYSTEM SET idle_replication_slot_timeout TO '1min'; -]); -$node->reload; - # Register an injection point on the primary to forcibly cause a slot timeout $node->safe_psql('postgres', 'CREATE EXTENSION injection_points;'); @@ -62,6 +89,9 @@ if (!$node->check_extension('injection_points')) $node->safe_psql('postgres', "SELECT injection_points_attach('slot-time-out-inval', 'error');"); +# Run a checkpoint which will invalidate the slots +$node->safe_psql('postgres', "CHECKPOINT"); + # Wait for slots to become inactive. Note that nobody has acquired the slot # yet, so it must get invalidated due to idle timeout. wait_for_slot_invalidation($node, 'physical_slot', $logstart); @@ -70,50 +100,4 @@ wait_for_slot_invalidation($node, 'logical_slot', $logstart); # Testcase end # ============================================================================= -# Wait for slot to first become idle and then get invalidated -sub wait_for_slot_invalidation -{ - my ($node, $slot, $offset) = @_; - my $node_name = $node->name; - - trigger_slot_invalidation($node, $slot, $offset); - - # Check that an invalidated slot cannot be acquired - my ($result, $stdout, $stderr); - ($result, $stdout, $stderr) = $node->psql( - 'postgres', qq[ - SELECT pg_replication_slot_advance('$slot', '0/1'); - ]); - ok( $stderr =~ /can no longer access replication slot "$slot"/, - "detected error upon trying to acquire invalidated slot $slot on node $node_name" - ) - or die - "could not detect error upon trying to acquire invalidated slot $slot on node $node_name"; -} - -# Trigger slot invalidation and confirm it in the server log -sub trigger_slot_invalidation -{ - my ($node, $slot, $offset) = @_; - my $node_name = $node->name; - my $invalidated = 0; - - # Run a checkpoint - $node->safe_psql('postgres', "CHECKPOINT"); - - # The slot's invalidation should be logged - $node->wait_for_log(qr/invalidating obsolete replication slot \"$slot\"/, - $offset); - - # Check that the invalidation reason is 'idle_timeout' - $node->poll_query_until( - 'postgres', qq[ - SELECT COUNT(slot_name) = 1 FROM pg_replication_slots - WHERE slot_name = '$slot' AND - invalidation_reason = 'idle_timeout'; - ]) - or die - "Timed out while waiting for invalidation reason of slot $slot to be set on node $node_name"; -} - done_testing();