On Sun, Mar 20, 2022 at 4:56 PM Melanie Plageman <melanieplage...@gmail.com> wrote: > > Addressed all of these points in > v2-0001-add-replica-cleanup-tests.patch > > also added a new test file in > v2-0002-Add-TAP-test-for-discarding-stats-after-crash.patch > testing correct behavior after a crash and when stats file is invalid >
Attached is the last of the tests confirming clean up for stats in the shared stats hashtable (these are for the subscription stats). I thought that maybe these tests could now use pg_stat_force_next_flush() instead of poll_query_until() but I wasn't sure how to ensure that the error has happened and the pending entry has been added before setting force_next_flush. I also added in tests that resetting subscription stats works as expected. - Melanie
From ffb83cc6ad2941f1d01b42b55dd0615a011d59cf Mon Sep 17 00:00:00 2001 From: Melanie Plageman <melanieplage...@gmail.com> Date: Mon, 21 Mar 2022 14:52:55 -0400 Subject: [PATCH] add subscriber stats reset and drop tests --- src/test/subscription/t/026_stats.pl | 303 ++++++++++++++++++++------- 1 file changed, 230 insertions(+), 73 deletions(-) diff --git a/src/test/subscription/t/026_stats.pl b/src/test/subscription/t/026_stats.pl index a42ea3170e..e86bfb4fea 100644 --- a/src/test/subscription/t/026_stats.pl +++ b/src/test/subscription/t/026_stats.pl @@ -18,83 +18,240 @@ my $node_subscriber = PostgreSQL::Test::Cluster->new('subscriber'); $node_subscriber->init(allows_streaming => 'logical'); $node_subscriber->start; -# Initial table setup on both publisher and subscriber. On subscriber we -# create the same tables but with primary keys. Also, insert some data that -# will conflict with the data replicated from publisher later. -$node_publisher->safe_psql( - 'postgres', - qq[ -BEGIN; -CREATE TABLE test_tab1 (a int); -INSERT INTO test_tab1 VALUES (1); -COMMIT; -]); -$node_subscriber->safe_psql( - 'postgres', - qq[ -BEGIN; -CREATE TABLE test_tab1 (a int primary key); -INSERT INTO test_tab1 VALUES (1); -COMMIT; -]); - -# Setup publication. -my $publisher_connstr = $node_publisher->connstr . ' dbname=postgres'; -$node_publisher->safe_psql('postgres', - "CREATE PUBLICATION tap_pub FOR TABLE test_tab1;"); + +sub create_sub_pub_w_errors +{ + my ($publisher, $subscriber, $db, $table_name) = @_; + # Initial table setup on both publisher and subscriber. On subscriber we + # create the same tables but with primary keys. Also, insert some data that + # will conflict with the data replicated from publisher later. + $publisher->safe_psql( + $db, + qq[ + BEGIN; + CREATE TABLE $table_name(a int); + INSERT INTO $table_name VALUES (1); + COMMIT; + ]); + $subscriber->safe_psql( + $db, + qq[ + BEGIN; + CREATE TABLE $table_name(a int primary key); + INSERT INTO $table_name VALUES (1); + COMMIT; + ]); + + # Set up publication. + my $pub_name = $table_name . '_pub'; + my $publisher_connstr = $publisher->connstr . qq( dbname=$db); + + $publisher->safe_psql($db, + qq(CREATE PUBLICATION $pub_name FOR TABLE $table_name)); + + # Create subscription. The tablesync for table on subscription will enter into + # infinite error loop due to violating the unique constraint. + my $sub_name = $table_name . '_sub'; + $subscriber->safe_psql($db, + qq(CREATE SUBSCRIPTION $sub_name CONNECTION '$publisher_connstr' PUBLICATION $pub_name) + ); + + $publisher->wait_for_catchup($sub_name); + + # TODO: can this be replaced with pg_stat_force_next_flush() and a test + # that sync error > 0? + # Wait for the tablesync error to be reported. + $node_subscriber->poll_query_until( + $db, + qq[ + SELECT sync_error_count > 0 + FROM pg_stat_subscription_stats + WHERE subname = '$sub_name' + ]) or die qq(Timed out while waiting for tablesync errors for subscription '$sub_name'); + + # Truncate test_tab1 so that tablesync worker can continue. + $subscriber->safe_psql($db, qq(TRUNCATE $table_name)); + + # Wait for initial tablesync to finish. + $subscriber->poll_query_until( + $db, + qq[ + SELECT count(1) = 1 FROM pg_subscription_rel + WHERE srrelid = '$table_name'::regclass AND srsubstate in ('r', 's') + ]) or die qq(Timed out while waiting for subscriber to synchronize data for table '$table_name'.); + + # Check test table on the subscriber has one row. + my $result = $subscriber->safe_psql($db, qq(SELECT a FROM $table_name)); + is($result, qq(1), qq(Check that table '$table_name' now has 1 row.)); + + # Insert data to test table on the publisher, raising an error on the + # subscriber due to violation of the unique constraint on test table. + $publisher->safe_psql($db, qq(INSERT INTO $table_name VALUES (1))); + + # TODO: can this be replaced with a pg_stat_force_next_flush() and a test + # that apply error > 0? + # Wait for the apply error to be reported. + $subscriber->poll_query_until( + $db, + qq[ + SELECT apply_error_count > 0 + FROM pg_stat_subscription_stats + WHERE subname = '$sub_name' + ]) or die qq(Timed out while waiting for apply error for subscription '$sub_name'); + + # Truncate test table so that apply worker can continue. + $subscriber->safe_psql($db, qq(TRUNCATE $table_name)); + + return ($pub_name, $sub_name); +} + +my $db = 'postgres'; # There shouldn't be any subscription errors before starting logical replication. -my $result = $node_subscriber->safe_psql('postgres', - "SELECT count(1) FROM pg_stat_subscription_stats"); -is($result, qq(0), 'check no subscription error'); - -# Create subscription. The tablesync for test_tab1 on tap_sub will enter into -# infinite error loop due to violating the unique constraint. -$node_subscriber->safe_psql('postgres', - "CREATE SUBSCRIPTION tap_sub CONNECTION '$publisher_connstr' PUBLICATION tap_pub;" +my $result = $node_subscriber->safe_psql($db, + qq(SELECT count(1) FROM pg_stat_subscription_stats)); +is($result, qq(0), 'Check that there are no subscription errors before starting logical replication.'); + +# Create the publication and subscription with sync and apply errors +my $table1_name = 'test_tab1'; +my ($pub1_name, $sub1_name) = create_sub_pub_w_errors($node_publisher, $node_subscriber, $db, $table1_name); + +# Apply and Sync errors are > 0 and reset timestamp is NULL +is($node_subscriber->safe_psql( + $db, + qq(SELECT apply_error_count > 0, + sync_error_count > 0, + stats_reset IS NULL + FROM pg_stat_subscription_stats + WHERE subname = '$sub1_name') +), qq(t|t|t), +qq(Check that apply errors and sync errors are both > 0 and stats_reset is NULL for subscription '$sub1_name'.)); + +# Reset a single subscription +$node_subscriber->safe_psql( + $db, + qq(SELECT pg_stat_reset_subscription_stats((SELECT subid FROM pg_stat_subscription_stats WHERE subname = '$sub1_name'))) +); + +# Apply and Sync errors are 0 and stats reset is not NULL +is($node_subscriber->safe_psql( + $db, + qq(SELECT apply_error_count = 0, + sync_error_count = 0, + stats_reset IS NOT NULL + FROM pg_stat_subscription_stats + WHERE subname = '$sub1_name') +), qq(t|t|t), +qq(Confirm that apply errors and sync errors are both 0 and stats_reset is not NULL after reset for subscription '$sub1_name'.)); + +# Get reset timestamp +my $reset_time1 = $node_subscriber->safe_psql( + $db, + qq(SELECT stats_reset FROM pg_stat_subscription_stats WHERE subname = '$sub1_name') +); + +# Reset single sub again +$node_subscriber->safe_psql( + $db, + qq(SELECT pg_stat_reset_subscription_stats((SELECT subid FROM + pg_stat_subscription_stats WHERE subname = '$sub1_name'))) +); + +# check reset timestamp is newer after reset +is($node_subscriber->safe_psql( + $db, + qq(SELECT stats_reset > '$reset_time1'::timestamptz FROM + pg_stat_subscription_stats WHERE subname = '$sub1_name') +), qq(t), qq(Check reset timestamp for '$sub1_name' is newer after second reset.)); + +# Make second subscription and publication +my $table2_name = 'test_tab2'; +my ($pub2_name, $sub2_name) = create_sub_pub_w_errors($node_publisher, $node_subscriber, $db, $table2_name); + +# Apply and Sync errors are > 0 and reset timestamp is NULL +is($node_subscriber->safe_psql( + $db, + qq(SELECT apply_error_count > 0, + sync_error_count > 0, + stats_reset IS NULL + FROM pg_stat_subscription_stats + WHERE subname = '$sub2_name') +), qq(t|t|t), qq(Confirm that apply errors and sync errors are both > 0 and stats_reset is NULL for sub '$sub2_name'.)); + +# Reset all subscriptions +$node_subscriber->safe_psql( + $db, + qq(SELECT pg_stat_reset_subscription_stats(NULL)) +); + +# Apply and Sync errors are 0 and stats reset is not NULL +is($node_subscriber->safe_psql( + $db, + qq(SELECT apply_error_count = 0, + sync_error_count = 0, + stats_reset IS NOT NULL + FROM pg_stat_subscription_stats + WHERE subname = '$sub1_name') +), qq(t|t|t), qq(Confirm that apply errors and sync errors are both 0 and stats_reset is not NULL for sub '$sub1_name' after reset.)); + +is($node_subscriber->safe_psql( + $db, + qq(SELECT apply_error_count = 0, + sync_error_count = 0, + stats_reset IS NOT NULL + FROM pg_stat_subscription_stats + WHERE subname = '$sub2_name') +), qq(t|t|t), qq(Confirm that apply errors and sync errors are both 0 and stats_reset is not NULL for sub '$sub2_name' after reset.)); + +# Get reset timestamp 1 +$reset_time1 = $node_subscriber->safe_psql( + $db, + qq(SELECT stats_reset FROM pg_stat_subscription_stats WHERE subname = '$sub1_name') +); + +# Get reset timestamp 2 +my $reset_time2 = $node_subscriber->safe_psql( + $db, + qq(SELECT stats_reset FROM pg_stat_subscription_stats WHERE subname = '$sub2_name') +); + +# Reset all subscriptions +$node_subscriber->safe_psql( + $db, + qq(SELECT pg_stat_reset_subscription_stats(NULL)) +); + +# check reset timestamp for sub1 is newer after reset +is($node_subscriber->safe_psql( + $db, + qq(SELECT stats_reset > '$reset_time1'::timestamptz FROM + pg_stat_subscription_stats WHERE subname = '$sub1_name') +), qq(t), qq(Confirm that reset timestamp for '$sub1_name' is newer after second reset.)); + +# check reset timestamp for sub2 is newer after reset +is($node_subscriber->safe_psql( + $db, + qq(SELECT stats_reset > '$reset_time2'::timestamptz FROM + pg_stat_subscription_stats WHERE subname = '$sub2_name') +), qq(t), qq(Confirm that reset timestamp for '$sub2_name' is newer after second reset.)); + +# Get subscription 1 oid +my $sub1_oid = $node_subscriber->safe_psql($db, + qq(SELECT oid FROM pg_subscription WHERE subname = '$sub1_name') +); + +# Drop subscription 1 +$node_subscriber->safe_psql( + $db, + qq(DROP SUBSCRIPTION $sub1_name) ); -$node_publisher->wait_for_catchup('tap_sub'); - -# Wait for the tablesync error to be reported. -$node_subscriber->poll_query_until( - 'postgres', - qq[ -SELECT sync_error_count > 0 -FROM pg_stat_subscription_stats -WHERE subname = 'tap_sub' -]) or die "Timed out while waiting for tablesync error"; - -# Truncate test_tab1 so that tablesync worker can continue. -$node_subscriber->safe_psql('postgres', "TRUNCATE test_tab1;"); - -# Wait for initial tablesync for test_tab1 to finish. -$node_subscriber->poll_query_until( - 'postgres', - qq[ -SELECT count(1) = 1 FROM pg_subscription_rel -WHERE srrelid = 'test_tab1'::regclass AND srsubstate in ('r', 's') -]) or die "Timed out while waiting for subscriber to synchronize data"; - -# Check test_tab1 on the subscriber has one row. -$result = $node_subscriber->safe_psql('postgres', "SELECT a FROM test_tab1"); -is($result, qq(1), 'check the table has now row'); - -# Insert data to test_tab1 on the publisher, raising an error on the subscriber -# due to violation of the unique constraint on test_tab1. -$node_publisher->safe_psql('postgres', "INSERT INTO test_tab1 VALUES (1)"); - -# Wait for the apply error to be reported. -$node_subscriber->poll_query_until( - 'postgres', - qq[ -SELECT apply_error_count > 0 -FROM pg_stat_subscription_stats -WHERE subname = 'tap_sub' -]) or die "Timed out while waiting for apply error"; - -# Truncate test_tab1 so that apply worker can continue. -$node_subscriber->safe_psql('postgres', "TRUNCATE test_tab1;"); +# Subscription stats for sub1 should be gone +is($node_subscriber->safe_psql( + $db, + qq(SELECT pg_stat_stats_exist($sub1_oid, $sub1_oid, 'subscription')) + ), qq(f), qq(Subscription stats for subscription '$sub1_name' should be removed.)); + $node_subscriber->stop('fast'); $node_publisher->stop('fast'); -- 2.30.2