On Fri, Jun 11, 2021 at 10:19 AM Andres Freund <and...@anarazel.de> wrote:
>
> Hi,
>
> On 2021-06-10 16:42:01 +0300, Anastasia Lubennikova wrote:
> > Cool. Thank you for working on that!
> > Could you please share a WIP patch for the $subj? I'd be happy to help with
> > it.
>
> I've attached the current WIP state, which hasn't evolved much since
> this message... I put the test in 
> src/backend/access/heap/t/001_emergency_vacuum.pl
> but I'm not sure that's the best place. But I didn't think
> src/test/recovery is great either.
>

Thank you for sharing the WIP patch.

Regarding point (1) you mentioned (StartupSUBTRANS() takes a long time
for zeroing out all pages), how about using single-user mode instead
of preparing the transaction? That is, after pg_resetwal we check the
ages of datfrozenxid by executing a query in single-user mode. That
way, we don’t need to worry about autovacuum concurrently running
while checking the ages of frozenxids. I’ve attached a PoC patch that
does the scenario like:

1. start cluster with autovacuum=off and create tables with a few data
and make garbage on them
2. stop cluster and do pg_resetwal
3. start cluster in single-user mode
4. check age(datfrozenxid)
5. stop cluster
6. start cluster and wait for autovacuums to increase template0,
template1, and postgres datfrozenxids

I put new tests in src/test/module/heap since we already have tests
for brin in src/test/module/brin.

I think that tap test facility to run queries in single-user mode will
also be helpful for testing a new vacuum option/command that is
intended to use in emergency cases and proposed here[1].

Regards,

[1]  
https://www.postgresql.org/message-id/flat/20220128012842.GZ23027%40telsasoft.com#b76c13554f90d1c8bb5532d6f3e5cbf8


--
Masahiko Sawada
EDB:  https://www.enterprisedb.com/
diff --git a/src/test/modules/heap/.gitignore b/src/test/modules/heap/.gitignore
new file mode 100644
index 0000000000..716e17f5a2
--- /dev/null
+++ b/src/test/modules/heap/.gitignore
@@ -0,0 +1,2 @@
+# Generated subdirectories
+/tmp_check/
diff --git a/src/test/modules/heap/Makefile b/src/test/modules/heap/Makefile
new file mode 100644
index 0000000000..d3c08a04b7
--- /dev/null
+++ b/src/test/modules/heap/Makefile
@@ -0,0 +1,14 @@
+# src/test/modules/heap/Makefile
+
+TAP_TESTS = 1
+
+ifdef USE_PGXS
+PG_CONFIG = pg_config
+PGXS := $(shell $(PG_CONFIG) --pgxs)
+include $(PGXS)
+else
+subdir = src/test/modules/heap
+top_builddir = ../../../..
+include $(top_builddir)/src/Makefile.global
+include $(top_srcdir)/contrib/contrib-global.mk
+endif
diff --git a/src/test/modules/heap/t/001_emergency_vacuum.pl b/src/test/modules/heap/t/001_emergency_vacuum.pl
new file mode 100644
index 0000000000..3229f99921
--- /dev/null
+++ b/src/test/modules/heap/t/001_emergency_vacuum.pl
@@ -0,0 +1,131 @@
+# Copyright (c) 2022, PostgreSQL Global Development Group
+
+# Test for wraparound emergency situation
+
+use strict;
+use warnings;
+use PostgreSQL::Test::Cluster;
+use PostgreSQL::Test::Utils;
+use Test::More tests => 8;
+use IPC::Run qw(pump finish timer);
+
+# Initialize primary node
+my $node_primary = PostgreSQL::Test::Cluster->new('primary');
+
+$node_primary->init(allows_streaming => 1);
+$node_primary->append_conf('postgresql.conf', qq/
+autovacuum = off # run autovacuum only when to anti wraparound
+max_prepared_transactions=10
+autovacuum_naptime = 1s
+# So it's easier to verify the order of operations
+autovacuum_max_workers=1
+autovacuum_vacuum_cost_delay=0
+log_autovacuum_min_duration=0
+/);
+$node_primary->start;
+
+#
+# Create tables for a few different test scenarios
+#
+
+$node_primary->safe_psql('postgres', qq/
+CREATE TABLE large(id serial primary key, data text, filler text default repeat(random()::text, 10));
+INSERT INTO large(data) SELECT generate_series(1,30000);
+
+CREATE TABLE large_trunc(id serial primary key, data text, filler text default repeat(random()::text, 10));
+INSERT INTO large_trunc(data) SELECT generate_series(1,30000);
+
+CREATE TABLE small(id serial primary key, data text, filler text default repeat(random()::text, 10));
+INSERT INTO small(data) SELECT generate_series(1,15000);
+
+CREATE TABLE small_trunc(id serial primary key, data text, filler text default repeat(random()::text, 10));
+INSERT INTO small_trunc(data) SELECT generate_series(1,15000);
+
+CREATE TABLE autovacuum_disabled(id serial primary key, data text) WITH (autovacuum_enabled=false);
+INSERT INTO autovacuum_disabled(data) SELECT generate_series(1,1000);
+/);
+
+# Delete a few rows to ensure that vacuum has work to do.
+$node_primary->safe_psql('postgres', qq/
+DELETE FROM large WHERE id % 2 = 0;
+DELETE FROM large_trunc WHERE id > 10000;
+DELETE FROM small WHERE id % 2 = 0;
+DELETE FROM small_trunc WHERE id > 1000;
+DELETE FROM autovacuum_disabled WHERE id % 2 = 0;
+/);
+
+
+# Stop the server and temporarily disable log_statement while running in single-user mode
+$node_primary->stop;
+$node_primary->append_conf('postgresql.conf', qq/
+log_statement = 'none'
+/);
+
+# Need to reset to a clog page boundary, otherwise we'll get errors
+# about the file not existing. With default compilation settings
+# CLOG_XACTS_PER_PAGE is 32768. The value below is 32768 *
+# (2000000000/32768 + 1), with 2000000000 being the max value for
+# autovacuum_freeze_max_age.
+
+command_like([ 'pg_resetwal', '-x2000027648', $node_primary->data_dir ],
+	     qr/Write-ahead log reset/, 'pg_resetwal -x to');
+
+my $in  = '';
+my $out = '';
+my $timer = timer(5);
+
+# Start the server in single-user mode.  That allows us to test interactions
+# without autovacuums.
+my $h = $node_primary->start_single_user_mode('postgres', \$in, \$out, $timer);
+
+$out = "";
+# Must be a single line with a new line at the end.
+$in .=
+    "SELECT datname, " .
+    "age(datfrozenxid) > current_setting('autovacuum_freeze_max_age')::int as old ".
+    "FROM pg_database ORDER BY 1;\n";
+
+# Pump until we got the result.
+pump $h until ($out != "" || $timer->is_expired);
+
+# Check all database are old enough.
+like($out, qr/1: datname = "postgres"[^\r\n]+\r\n\t 2: old = "t"/,
+     "postgres database is old enough");
+like($out, qr/1: datname = "template0"[^\r\n]+\r\n\t 2: old = "t"/,
+     "template0 database is old enough");
+like($out, qr/1: datname = "template1"[^\r\n]+\r\n\t 2: old = "t"/,
+     "template1 database is old enough");
+
+# Terminate single user mode.
+$in .= "\cD";
+finish $h or die "postgres --single returned $?";
+
+# Revert back the logging setting.
+$node_primary->append_conf('postgresql.conf', qq/
+log_statement = 'all'
+/);
+
+# Now test autovacuum behaviour.
+$node_primary->start;
+
+ok($node_primary->poll_query_until('postgres', qq/
+    SELECT NOT EXISTS (
+        SELECT *
+        FROM pg_database
+        WHERE age(datfrozenxid) > current_setting('autovacuum_freeze_max_age')::int)
+/),
+   "xid horizon increased");
+
+my $ret = $node_primary->safe_psql('postgres', qq/
+SELECT relname, age(relfrozenxid) > current_setting('autovacuum_freeze_max_age')::int
+FROM pg_class
+WHERE oid = ANY(ARRAY['large'::regclass, 'large_trunc', 'small', 'small_trunc', 'autovacuum_disabled'])
+ORDER BY 1
+/);
+is($ret, "autovacuum_disabled|f
+large|f
+large_trunc|f
+small|f
+small_trunc|f", "all tables are vacuumed");
+
+$node_primary->stop;
diff --git a/src/test/perl/PostgreSQL/Test/Cluster.pm b/src/test/perl/PostgreSQL/Test/Cluster.pm
index 265f3ae657..2d35978bac 100644
--- a/src/test/perl/PostgreSQL/Test/Cluster.pm
+++ b/src/test/perl/PostgreSQL/Test/Cluster.pm
@@ -858,6 +858,40 @@ sub start
 	return 1;
 }
 
+sub start_single_user_mode
+{
+    my ($self, $dbname, $stdin, $stdout, $timer) = @_;
+    my $name = $self->name;
+
+    BAIL_OUT("node \"$name\" is already running") if defined $self->{_pid};
+
+    print("### Starting node \"$name\" in single-user mode\n");
+
+    local %ENV = $self->_get_env();
+
+    my @postgres_params = (
+	$self->installed_command('postgres'),
+	'--single', '-D', $self->data_dir, 'postgres');
+
+    # Ensure there is no data waiting to be sent:
+    $$stdin = "" if ref($stdin);
+    # IPC::Run would otherwise append to existing contents:
+    $$stdout = "" if ref($stdout);
+
+    my $harness = IPC::Run::start \@postgres_params,
+	'<pty<', $stdin, '>pty>', $stdout, $timer;
+
+    # Pump until we see the startup banner.  This ensures that callers won't
+    # write write anything to the ptr before it's ready, avoiding an
+    # implementation issue in IPC::RUN.
+    pump $harness
+	until $$stdout =~ /PostgreSQL stand-alone backend/ || $timer->is_expired;
+
+    die "postgres --single startup timed out" if $timer->is_expired;
+
+    return $harness;
+}
+
 =pod
 
 =item $node->kill9()
diff --git a/src/tools/msvc/Mkvcbuild.pm b/src/tools/msvc/Mkvcbuild.pm
index a310bcb28c..f94c5ea8cb 100644
--- a/src/tools/msvc/Mkvcbuild.pm
+++ b/src/tools/msvc/Mkvcbuild.pm
@@ -50,7 +50,8 @@ my @contrib_excludes = (
 	'sepgsql',
 	'brin',             'test_extensions',
 	'test_misc',        'test_pg_dump',
-	'snapshot_too_old', 'unsafe_tests');
+	'snapshot_too_old', 'unsafe_tests',
+	'heap');
 
 # Set of variables for frontend modules
 my $frontend_defines = { 'initdb' => 'FRONTEND' };

Reply via email to