I looked at this a little while and was bothered by the perl changes; it
seems out of place to have RecursiveCopy be thinking about tablespaces,
which is way out of its league.  So I rewrote that to use a callback:
the PostgresNode code passes a callback that's in charge to handle the
case of a symlink.  Things look much more in place with that.  I didn't
verify that all places that should use this are filled.

In 0002 I found adding a new function unnecessary: we can keep backwards
compat by checking 'ref' of the third argument.  With that we don't have
to add a new function.  (POD changes pending.)

I haven't reviewed 0003.

v8 of all these patches attached.

"git am" told me your 0001 was in unrecognized format.  It applied fine
with "patch".  I suggest that if you're going to submit a series with
commit messages and all, please use "git format-patch" with the same
"-v" argument (9 in this case) for all patches.

-- 
Álvaro Herrera                https://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services
>From a81e747f0bfa90af8021e2399e196e177a93f62c Mon Sep 17 00:00:00 2001
From: Asim R P <aprav...@pivotal.io>
Date: Fri, 20 Sep 2019 17:31:25 +0530
Subject: [PATCH v8 1/3] Support node initialization from backup with
 tablespaces

User defined tablespaces appear as symlinks in in the backup.  This
commit tweaks recursive copy subroutine to allow for symlinks specific
to tablespaces.

Authored by Kyotaro
---
 src/test/perl/PostgresNode.pm  | 29 +++++++++++++++++++++++-
 src/test/perl/RecursiveCopy.pm | 40 ++++++++++++++++++++++++++++------
 2 files changed, 61 insertions(+), 8 deletions(-)

diff --git a/src/test/perl/PostgresNode.pm b/src/test/perl/PostgresNode.pm
index 2e0cf4a2f3..3cae483ddb 100644
--- a/src/test/perl/PostgresNode.pm
+++ b/src/test/perl/PostgresNode.pm
@@ -593,6 +593,32 @@ sub backup_fs_cold
 	return;
 }
 
+sub _srcsymlink
+{
+	my ($srcpath, $destpath) = @_;
+
+	croak "Cannot operate on symlink \"$srcpath\""
+		if ($srcpath !~ qr{/(pg_tblspc/[0-9]+)$});
+
+	# We have mapped tablespaces. Copy them individually
+	my $tmpdir = TestLib::tempdir;
+	my $dstrealdir = TestLib::perl2host($tmpdir);
+	my $srcrealdir = readlink($srcpath);
+
+	opendir(my $dh, $srcrealdir);
+	while (readdir $dh)
+	{
+		next if (/^\.\.?$/);
+		my $spath = "$srcrealdir/$_";
+		my $dpath = "$dstrealdir/$_";
+		RecursiveCopy::copypath($spath, $dpath);
+	}
+	closedir $dh;
+
+	symlink $dstrealdir, $destpath;
+
+	return 1;
+}
 
 # Common sub of backup_fs_hot and backup_fs_cold
 sub _backup_fs
@@ -680,7 +706,8 @@ sub init_from_backup
 
 	my $data_path = $self->data_dir;
 	rmdir($data_path);
-	RecursiveCopy::copypath($backup_path, $data_path);
+	RecursiveCopy::copypath($backup_path, $data_path,
+							srcsymlinkfn => \&_srcsymlink);
 	chmod(0700, $data_path);
 
 	# Base configuration for this node
diff --git a/src/test/perl/RecursiveCopy.pm b/src/test/perl/RecursiveCopy.pm
index baf5d0ac63..715edcdedd 100644
--- a/src/test/perl/RecursiveCopy.pm
+++ b/src/test/perl/RecursiveCopy.pm
@@ -66,6 +66,7 @@ sub copypath
 {
 	my ($base_src_dir, $base_dest_dir, %params) = @_;
 	my $filterfn;
+	my $srcsymlinkfn;
 
 	if (defined $params{filterfn})
 	{
@@ -80,31 +81,55 @@ sub copypath
 		$filterfn = sub { return 1; };
 	}
 
+	if (defined $params{srcsymlinkfn})
+	{
+		croak "if specified, srcsymlinkfn must be a subroutine reference"
+			unless defined(ref $params{srcsymlinkfn})
+			and (ref $params{srcsymlinkfn} eq 'CODE');
+
+		$srcsymlinkfn = $params{srcsymlinkfn};
+	}
+	else
+	{
+		$srcsymlinkfn = undef;
+	}
+
 	# Complain if original path is bogus, because _copypath_recurse won't.
 	croak "\"$base_src_dir\" does not exist" if !-e $base_src_dir;
 
 	# Start recursive copy from current directory
-	return _copypath_recurse($base_src_dir, $base_dest_dir, "", $filterfn);
+	return _copypath_recurse($base_src_dir, $base_dest_dir, "", $filterfn, $srcsymlinkfn);
 }
 
 # Recursive private guts of copypath
 sub _copypath_recurse
 {
-	my ($base_src_dir, $base_dest_dir, $curr_path, $filterfn) = @_;
+	my ($base_src_dir, $base_dest_dir, $curr_path, $filterfn,
+		$srcsymlinkfn) = @_;
 	my $srcpath  = "$base_src_dir/$curr_path";
 	my $destpath = "$base_dest_dir/$curr_path";
 
 	# invoke the filter and skip all further operation if it returns false
 	return 1 unless &$filterfn($curr_path);
 
-	# Check for symlink -- needed only on source dir
-	# (note: this will fall through quietly if file is already gone)
-	croak "Cannot operate on symlink \"$srcpath\"" if -l $srcpath;
-
 	# Abort if destination path already exists.  Should we allow directories
 	# to exist already?
 	croak "Destination path \"$destpath\" already exists" if -e $destpath;
 
+	# Check for symlink -- needed only on source dir
+	# If caller provided us with a callback, call it; otherwise we're out.
+	if (-l $srcpath)
+	{
+		if (defined $srcsymlinkfn)
+		{
+			return &$srcsymlinkfn($srcpath, $destpath);
+		}
+		else
+		{
+			croak "Cannot operate on symlink \"$srcpath\"";
+		}
+	}
+
 	# If this source path is a file, simply copy it to destination with the
 	# same name and we're done.
 	if (-f $srcpath)
@@ -137,7 +162,8 @@ sub _copypath_recurse
 		{
 			next if ($entry eq '.' or $entry eq '..');
 			_copypath_recurse($base_src_dir, $base_dest_dir,
-				$curr_path eq '' ? $entry : "$curr_path/$entry", $filterfn)
+				$curr_path eq '' ? $entry : "$curr_path/$entry", $filterfn,
+				$srcsymlinkfn)
 			  or die "copypath $srcpath/$entry -> $destpath/$entry failed";
 		}
 
-- 
2.20.1

>From 32e2c106aee30202f5731a163a6e2f1a88a6d06b Mon Sep 17 00:00:00 2001
From: Asim R P <aprav...@pivotal.io>
Date: Fri, 20 Sep 2019 17:34:19 +0530
Subject: [PATCH v8 2/3] Tests to replay create database operation on standby

The tests demonstrate that standby fails to replay a create database
WAL record during crash recovery, if one or more of underlying
directories are missing from the file system.  This can happen if a
drop tablespace or drop database WAL record has been replayed in
archive recovery, before a crash.  And then the create database record
happens to be replayed again during crash recovery.  The failures
indicate bugs that need to be fixed.

The first test, TEST 4, performs several DDL operations resulting in a
database directory being removed, along with a few create database
operations.  It expects crash recovery to succeed because for each
missing directory encountered during create database replay, a matching
drop tablespace or drop database WAL record is found later.

Second test, TEST 5, validates that a standby rightfully aborts replay
during archive recovery, if a missing directory is encountered when
replaying create database WAL record.

These tests have been proposed and implemented in various ways by
Alexandra, Anastasia, Kyotaro, Paul and me.
---
 src/test/perl/PostgresNode.pm             |  34 ++++-
 src/test/recovery/t/011_crash_recovery.pl | 152 +++++++++++++++++++++-
 2 files changed, 178 insertions(+), 8 deletions(-)

diff --git a/src/test/perl/PostgresNode.pm b/src/test/perl/PostgresNode.pm
index 3cae483ddb..e6e7ea505d 100644
--- a/src/test/perl/PostgresNode.pm
+++ b/src/test/perl/PostgresNode.pm
@@ -546,13 +546,22 @@ target server since it isn't done by default.
 
 sub backup
 {
-	my ($self, $backup_name) = @_;
+	my ($self, $backup_name, %params) = @_;
 	my $backup_path = $self->backup_dir . '/' . $backup_name;
 	my $name        = $self->name;
+	my @rest = ();
+
+	if (defined $params{tablespace_mappings})
+	{
+		my @ts_mappings = split(/,/, $params{tablespace_mappings});
+		foreach my $elem (@ts_mappings) {
+			push(@rest, '--tablespace-mapping='.$elem);
+		}
+	}
 
 	print "# Taking pg_basebackup $backup_name from node \"$name\"\n";
 	TestLib::system_or_bail('pg_basebackup', '-D', $backup_path, '-h',
-		$self->host, '-p', $self->port, '--no-sync');
+		$self->host, '-p', $self->port, '--no-sync', @rest);
 	print "# Backup finished\n";
 	return;
 }
@@ -1640,13 +1649,24 @@ Returns 1 if successful, 0 if timed out.
 
 sub poll_query_until
 {
-	my ($self, $dbname, $query, $expected) = @_;
+	my ($self, $dbname, $query, $params) = @_;
+	my $expected;
 
-	$expected = 't' unless defined($expected);    # default value
+	# Be backwards-compatible
+	if (defined $params and ref $params eq '')
+	{
+		$params = {
+			expected => $params,
+			timeout => 180
+		};
+	}
+
+	$params->{expected} = 't' unless defined($params->{expected});
+	$params->{timeout} = 180 unless defined($params->{timeout});
 
 	my $cmd = [ 'psql', '-XAt', '-c', $query, '-d', $self->connstr($dbname) ];
 	my ($stdout, $stderr);
-	my $max_attempts = 180 * 10;
+	my $max_attempts = $params->{timeout} * 10;
 	my $attempts     = 0;
 
 	while ($attempts < $max_attempts)
@@ -1656,7 +1676,7 @@ sub poll_query_until
 		chomp($stdout);
 		$stdout =~ s/\r//g if $TestLib::windows_os;
 
-		if ($stdout eq $expected)
+		if ($stdout eq $params->{expected})
 		{
 			return 1;
 		}
@@ -1674,7 +1694,7 @@ sub poll_query_until
 	diag qq(poll_query_until timed out executing this query:
 $query
 expecting this output:
-$expected
+$params->{expected}
 last actual query output:
 $stdout
 with stderr:
diff --git a/src/test/recovery/t/011_crash_recovery.pl b/src/test/recovery/t/011_crash_recovery.pl
index 526a3481fb..013d3d5b0c 100644
--- a/src/test/recovery/t/011_crash_recovery.pl
+++ b/src/test/recovery/t/011_crash_recovery.pl
@@ -6,6 +6,7 @@ use warnings;
 use PostgresNode;
 use TestLib;
 use Test::More;
+use File::Path qw(rmtree);
 use Config;
 if ($Config{osname} eq 'MSWin32')
 {
@@ -15,7 +16,7 @@ if ($Config{osname} eq 'MSWin32')
 }
 else
 {
-	plan tests => 3;
+	plan tests => 5;
 }
 
 my $node = get_new_node('master');
@@ -66,3 +67,152 @@ is($node->safe_psql('postgres', qq[SELECT txid_status('$xid');]),
 	'aborted', 'xid is aborted after crash');
 
 $tx->kill_kill;
+
+# TEST 4
+#
+# Ensure that a missing tablespace directory during crash recovery on
+# a standby is hangled correctly.  The standby should finish crash
+# recovery successfully because a matching drop database record is
+# found in the WAL.  The following scnearios are covered:
+#
+# 1. Create a database against a user-defined tablespace then drop the
+#    tablespace.
+#
+# 2. Move a database from source tablespace to target tablespace then
+#    drop the source tablespace.
+#
+# 3. Create a datbase from another database as template then drop the
+#    template database.
+
+my $node_master = get_new_node('master2');
+$node_master->init(allows_streaming => 1);
+$node_master->start;
+
+# Create tablespace
+my $dropme_ts_master = TestLib::tempdir;
+$dropme_ts_master = TestLib::perl2host($dropme_ts_master);
+my $source_ts_master = TestLib::tempdir;
+$source_ts_master = TestLib::perl2host($source_ts_master);
+my $target_ts_master = TestLib::tempdir;
+$target_ts_master = TestLib::perl2host($target_ts_master);
+
+$node_master->safe_psql('postgres',
+						qq[CREATE TABLESPACE dropme_ts location '$dropme_ts_master';
+						   CREATE TABLESPACE source_ts location '$source_ts_master';
+						   CREATE TABLESPACE target_ts location '$target_ts_master';
+						   CREATE DATABASE template_db IS_TEMPLATE = true;]);
+
+my $dropme_ts_standby = TestLib::tempdir;
+$dropme_ts_standby = TestLib::perl2host($dropme_ts_standby);
+my $source_ts_standby = TestLib::tempdir;
+$source_ts_standby = TestLib::perl2host($source_ts_standby);
+my $target_ts_standby = TestLib::tempdir;
+$target_ts_standby = TestLib::perl2host($target_ts_standby);
+
+# Take backup
+my $backup_name = 'my_backup';
+my $ts_mapping = "$dropme_ts_master=$dropme_ts_standby," .
+  "$source_ts_master=$source_ts_standby," .
+  "$target_ts_master=$target_ts_standby";
+$node_master->backup($backup_name, tablespace_mappings => $ts_mapping);
+
+my $node_standby = get_new_node('standby2');
+$node_standby->init_from_backup($node_master, $backup_name, has_streaming => 1);
+$node_standby->start;
+
+# Make sure connection is made
+$node_master->poll_query_until(
+	'postgres', 'SELECT count(*) = 1 FROM pg_stat_replication');
+
+# Make sure to perform restartpoint after tablespace creation
+$node_master->wait_for_catchup($node_standby, 'replay',
+							   $node_master->lsn('replay'));
+$node_standby->safe_psql('postgres', 'CHECKPOINT');
+
+# Do immediate shutdown just after a sequence of CREAT DATABASE / DROP
+# DATABASE / DROP TABLESPACE. This causes CREATE DATBASE WAL records
+# to be applied to already-removed directories.
+$node_master->safe_psql('postgres',
+						q[CREATE DATABASE dropme_db1 WITH TABLESPACE dropme_ts;
+						  CREATE DATABASE dropme_db2 WITH TABLESPACE dropme_ts;
+						  CREATE DATABASE moveme_db TABLESPACE source_ts;
+						  ALTER DATABASE moveme_db SET TABLESPACE target_ts;
+						  DROP DATABASE dropme_db1;
+						  CREATE DATABASE newdb TEMPLATE template_db;
+						  ALTER DATABASE template_db IS_TEMPLATE = false;
+						  DROP TABLESPACE source_ts;
+						  DROP DATABASE dropme_db2;
+						  DROP TABLESPACE dropme_ts;
+						  DROP DATABASE template_db;]);
+$node_master->wait_for_catchup($node_standby, 'replay',
+							   $node_master->lsn('replay'));
+$node_standby->stop('immediate');
+
+# Should restart ignoring directory creation error.
+is($node_standby->start(fail_ok => 1), 1);
+
+# TEST 5
+#
+# Ensure that a missing tablespace directory during create database
+# replay immediately causes panic if the standby has already reached
+# consistent state (archive recovery is in progress).
+
+$node_master = get_new_node('master3');
+$node_master->init(allows_streaming => 1);
+$node_master->start;
+
+# Create tablespace
+my $ts_master = TestLib::tempdir;
+$ts_master = TestLib::perl2host($ts_master);
+$node_master->safe_psql('postgres', "CREATE TABLESPACE ts1 LOCATION '$ts_master'");
+$node_master->safe_psql('postgres', "CREATE DATABASE db1 TABLESPACE ts1");
+
+my $ts_standby = TestLib::tempdir("standby");
+$ts_standby = TestLib::perl2host($ts_standby);
+
+# Take backup
+$backup_name = 'my_backup';
+$node_master->backup($backup_name,
+					 tablespace_mappings =>
+					   "$ts_master=$ts_standby");
+$node_standby = get_new_node('standby3');
+$node_standby->init_from_backup($node_master, $backup_name, has_streaming => 1);
+$node_standby->start;
+
+# Make sure standby reached consistency and starts accepting connections
+$node_standby->poll_query_until('postgres', 'SELECT 1', '1');
+
+# Remove standby tablespace directory so it will be missing when
+# replay resumes.
+#
+# The tablespace mapping is lost when the standby node is initialized
+# from basebackup because RecursiveCopy::copypath creates a new temp
+# directory for each tablspace symlink found in backup.  We must
+# obtain the correct tablespace directory by querying standby.
+$ts_standby = $node_standby->safe_psql(
+	'postgres',
+	"select pg_tablespace_location(oid) from pg_tablespace where spcname = 'ts1'");
+rmtree($ts_standby);
+
+# Create a database in the tablespace and a table in default tablespace
+$node_master->safe_psql('postgres',
+						q[CREATE TABLE should_not_replay_insertion(a int);
+						  CREATE DATABASE db2 WITH TABLESPACE ts1;
+						  INSERT INTO should_not_replay_insertion VALUES (1);]);
+
+# Standby should fail and should not silently skip replaying the wal
+if ($node_master->poll_query_until(
+		'postgres',
+		'SELECT count(*) = 0 FROM pg_stat_replication',
+		timeout => 5) == 1)
+{
+	pass('standby failed as expected');
+	# We know that the standby has failed.  Setting its pid to
+	# undefined avoids error when PostgreNode module tries to stop the
+	# standby node as part of tear_down sequence.
+	$node_standby->{_pid} = undef;
+}
+else
+{
+	fail('standby did not fail within 5 seconds');
+}
-- 
2.20.1

>From 47ee0330541f22cfd934c59cc1ae6df34b08eea6 Mon Sep 17 00:00:00 2001
From: Alvaro Herrera <alvhe...@alvh.no-ip.org>
Date: Thu, 9 Jan 2020 17:54:40 -0300
Subject: [PATCH v8 3/3] Fix replay of create database records on standby

Crash recovery on standby may encounter missing directories when
replaying create database WAL records.  Prior to this patch, the
standby would fail to recover in such a case.  However, the
directories could be legitimately missing.  Consider a sequence of WAL
records as follows:

    CREATE DATABASE
    DROP DATABASE
    DROP TABLESPACE

If, after replaying the last WAL record and removing the tablespace
directory, the standby crashes and has to replay the create database
record again, the crash recovery must be able to move on.

This patch adds mechanism similar to invalid page hash table, to track
missing directories during crash recovery.  If all the missing
directory references are matched with corresponding drop records at
the end of crash recovery, the standby can safely enter archive
recovery.

Bug identified by Paul.

Authored by Paul, Kyotaro and Asim R P.
---
 src/backend/access/rmgrdesc/dbasedesc.c |  16 ++-
 src/backend/access/transam/xlog.c       |   6 ++
 src/backend/access/transam/xlogutils.c  | 130 ++++++++++++++++++++++++
 src/backend/commands/dbcommands.c       |  54 ++++++++++
 src/backend/commands/tablespace.c       |   3 +
 src/include/access/xlogutils.h          |   4 +
 src/include/commands/dbcommands.h       |   2 +
 7 files changed, 210 insertions(+), 5 deletions(-)

diff --git a/src/backend/access/rmgrdesc/dbasedesc.c b/src/backend/access/rmgrdesc/dbasedesc.c
index 73d2a4ca34..f7117873d7 100644
--- a/src/backend/access/rmgrdesc/dbasedesc.c
+++ b/src/backend/access/rmgrdesc/dbasedesc.c
@@ -23,14 +23,17 @@ dbase_desc(StringInfo buf, XLogReaderState *record)
 {
 	char	   *rec = XLogRecGetData(record);
 	uint8		info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
+	char		*dbpath1, *dbpath2;
 
 	if (info == XLOG_DBASE_CREATE)
 	{
 		xl_dbase_create_rec *xlrec = (xl_dbase_create_rec *) rec;
 
-		appendStringInfo(buf, "copy dir %u/%u to %u/%u",
-						 xlrec->src_tablespace_id, xlrec->src_db_id,
-						 xlrec->tablespace_id, xlrec->db_id);
+		dbpath1 = GetDatabasePath(xlrec->src_db_id,  xlrec->src_tablespace_id);
+		dbpath2 = GetDatabasePath(xlrec->db_id, xlrec->tablespace_id);
+		appendStringInfo(buf, "copy dir %s to %s", dbpath1, dbpath2);
+		pfree(dbpath2);
+		pfree(dbpath1);
 	}
 	else if (info == XLOG_DBASE_DROP)
 	{
@@ -39,8 +42,11 @@ dbase_desc(StringInfo buf, XLogReaderState *record)
 
 		appendStringInfo(buf, "dir");
 		for (i = 0; i < xlrec->ntablespaces; i++)
-			appendStringInfo(buf, " %u/%u",
-							 xlrec->tablespace_ids[i], xlrec->db_id);
+		{
+			dbpath1 = GetDatabasePath(xlrec->db_id, xlrec->tablespace_ids[i]);
+			appendStringInfo(buf,  "%s", dbpath1);
+			pfree(dbpath1);
+		}
 	}
 }
 
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 7f4f784c0e..d97e48f369 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -7890,6 +7890,12 @@ CheckRecoveryConsistency(void)
 		 */
 		XLogCheckInvalidPages();
 
+		/*
+		 * Check if the XLOG sequence contained any unresolved references to
+		 * missing directories.
+		 */
+		XLogCheckMissingDirs();
+
 		reachedConsistency = true;
 		ereport(LOG,
 				(errmsg("consistent recovery state reached at %X/%X",
diff --git a/src/backend/access/transam/xlogutils.c b/src/backend/access/transam/xlogutils.c
index b55c383370..6c2dd5aba1 100644
--- a/src/backend/access/transam/xlogutils.c
+++ b/src/backend/access/transam/xlogutils.c
@@ -56,6 +56,136 @@ typedef struct xl_invalid_page
 
 static HTAB *invalid_page_tab = NULL;
 
+/*
+ * If a create database WAL record is being replayed more than once during
+ * crash recovery on a standby, it is possible that either the tablespace
+ * directory or the template database directory is missing.  This happens when
+ * the directories are removed by replay of subsequent drop records.  Note
+ * that this problem happens only on standby and not on master.  On master, a
+ * checkpoint is created at the end of create database operation. On standby,
+ * however, such a strategy (creating restart points during replay) is not
+ * viable because it will slow down WAL replay.
+ *
+ * The alternative is to track references to each missing directory
+ * encountered when performing crash recovery in the following hash table.
+ * Similar to invalid page table above, the expectation is that each missing
+ * directory entry should be matched with a drop database or drop tablespace
+ * WAL record by the end of crash recovery.
+ */
+typedef struct xl_missing_dir_key
+{
+	Oid spcNode;
+	Oid dbNode;
+} xl_missing_dir_key;
+
+typedef struct xl_missing_dir
+{
+	xl_missing_dir_key key;
+	char path[MAXPGPATH];
+} xl_missing_dir;
+
+static HTAB *missing_dir_tab = NULL;
+
+void
+XLogLogMissingDir(Oid spcNode, Oid dbNode, char *path)
+{
+	xl_missing_dir_key key;
+	bool found;
+	xl_missing_dir *entry;
+
+	/*
+	 * Database OID may be invalid but tablespace OID must be valid.  If
+	 * dbNode is InvalidOid, we are logging a missing tablespace directory,
+	 * otherwise we are logging a missing database directory.
+	 */
+	Assert(OidIsValid(spcNode));
+
+	if (reachedConsistency)
+		elog(PANIC, "cannot find directory %s tablespace %d database %d",
+			 path, spcNode, dbNode);
+
+	if (missing_dir_tab == NULL)
+	{
+		/* create hash table when first needed */
+		HASHCTL		ctl;
+
+		memset(&ctl, 0, sizeof(ctl));
+		ctl.keysize = sizeof(xl_missing_dir_key);
+		ctl.entrysize = sizeof(xl_missing_dir);
+
+		missing_dir_tab = hash_create("XLOG missing directory table",
+									   100,
+									   &ctl,
+									   HASH_ELEM | HASH_BLOBS);
+	}
+
+	key.spcNode = spcNode;
+	key.dbNode = dbNode;
+
+	entry = hash_search(missing_dir_tab, &key, HASH_ENTER, &found);
+
+	if (found)
+		elog(DEBUG2, "missing directory %s tablespace %d database %d already exists: %s",
+			 path, spcNode, dbNode, entry->path);
+	else
+	{
+		strlcpy(entry->path, path, sizeof(entry->path));
+		elog(DEBUG2, "logged missing dir %s tablespace %d database %d",
+			 path, spcNode, dbNode);
+	}
+}
+
+void
+XLogForgetMissingDir(Oid spcNode, Oid dbNode, char *path)
+{
+	xl_missing_dir_key key;
+
+	key.spcNode = spcNode;
+	key.dbNode = dbNode;
+
+	/* Database OID may be invalid but tablespace OID must be valid. */
+	Assert(OidIsValid(spcNode));
+
+	if (missing_dir_tab == NULL)
+		return;
+
+	if (hash_search(missing_dir_tab, &key, HASH_REMOVE, NULL) == NULL)
+		elog(DEBUG2, "dir %s tablespace %d database %d is not missing",
+			 path, spcNode, dbNode);
+	else
+		elog(DEBUG2, "forgot missing dir %s for tablespace %d database %d",
+			 path, spcNode, dbNode);
+}
+
+/*
+ * This is called at the end of crash recovery, before entering archive
+ * recovery on a standby.  PANIC if the hash table is not empty.
+ */
+void
+XLogCheckMissingDirs(void)
+{
+	HASH_SEQ_STATUS status;
+	xl_missing_dir *hentry;
+	bool		foundone = false;
+
+	if (missing_dir_tab == NULL)
+		return;					/* nothing to do */
+
+	hash_seq_init(&status, missing_dir_tab);
+
+	while ((hentry = (xl_missing_dir *) hash_seq_search(&status)) != NULL)
+	{
+		elog(WARNING, "missing directory \"%s\" tablespace %d database %d",
+			 hentry->path, hentry->key.spcNode, hentry->key.dbNode);
+		foundone = true;
+	}
+
+	if (foundone)
+		elog(PANIC, "WAL contains references to missing directories");
+
+	hash_destroy(missing_dir_tab);
+	missing_dir_tab = NULL;
+}
 
 /* Report a reference to an invalid page */
 static void
diff --git a/src/backend/commands/dbcommands.c b/src/backend/commands/dbcommands.c
index 367c30adb0..6d6668e4f8 100644
--- a/src/backend/commands/dbcommands.c
+++ b/src/backend/commands/dbcommands.c
@@ -46,6 +46,7 @@
 #include "commands/defrem.h"
 #include "commands/seclabel.h"
 #include "commands/tablespace.h"
+#include "common/file_perm.h"
 #include "mb/pg_wchar.h"
 #include "miscadmin.h"
 #include "pgstat.h"
@@ -2185,7 +2186,9 @@ dbase_redo(XLogReaderState *record)
 		xl_dbase_create_rec *xlrec = (xl_dbase_create_rec *) XLogRecGetData(record);
 		char	   *src_path;
 		char	   *dst_path;
+		char	   *parent_path;
 		struct stat st;
+		bool	    skip = false;
 
 		src_path = GetDatabasePath(xlrec->src_db_id, xlrec->src_tablespace_id);
 		dst_path = GetDatabasePath(xlrec->db_id, xlrec->tablespace_id);
@@ -2203,6 +2206,54 @@ dbase_redo(XLogReaderState *record)
 						(errmsg("some useless files may be left behind in old database directory \"%s\"",
 								dst_path)));
 		}
+		else
+		{
+			/*
+			 * It is possible that drop tablespace record appearing later in
+			 * the WAL as already been replayed.  That means we are replaying
+			 * the create database record second time, as part of crash
+			 * recovery.  In that case, the tablespace directory has already
+			 * been removed and the create database operation cannot be
+			 * replayed.  We should skip the replay but remember the missing
+			 * tablespace directory, to be matched with a drop tablespace
+			 * record later.
+			 */
+			parent_path = pstrdup(dst_path);
+			get_parent_directory(parent_path);
+			if (!(stat(parent_path, &st) == 0 && S_ISDIR(st.st_mode)))
+			{
+				XLogLogMissingDir(xlrec->tablespace_id, InvalidOid, dst_path);
+				skip = true;
+				ereport(WARNING,
+						(errmsg("skipping create database WAL record"),
+						 errdetail("Target tablespace \"%s\" not found. We "
+								   "expect to encounter a WAL record that "
+								   "removes this directory before reaching "
+								   "consistent state.", parent_path)));
+			}
+			pfree(parent_path);
+		}
+
+		/*
+		 * Source directory may be missing.  E.g. the template database used
+		 * for creating this database may have been dropped, due to reasons
+		 * noted above.  Moving a database from one tablespace may also be a
+		 * partner in the crime.
+		 */
+		if (!(stat(src_path, &st) == 0 && S_ISDIR(st.st_mode)))
+		{
+			XLogLogMissingDir(xlrec->src_tablespace_id, xlrec->src_db_id, src_path);
+			skip = true;
+			ereport(WARNING,
+					(errmsg("skipping create database WAL record"),
+					 errdetail("Source database \"%s\" not found. We expect "
+							   "to encounter a WAL record that removes this "
+							   "directory before reaching consistent state.",
+							   src_path)));
+		}
+
+		if (skip)
+			return;
 
 		/*
 		 * Force dirty buffers out to disk, to ensure source database is
@@ -2260,6 +2311,9 @@ dbase_redo(XLogReaderState *record)
 				ereport(WARNING,
 						(errmsg("some useless files may be left behind in old database directory \"%s\"",
 								dst_path)));
+
+			XLogForgetMissingDir(xlrec->tablespace_ids[i], xlrec->db_id, dst_path);
+
 			pfree(dst_path);
 		}
 
diff --git a/src/backend/commands/tablespace.c b/src/backend/commands/tablespace.c
index 051478057f..33407dceeb 100644
--- a/src/backend/commands/tablespace.c
+++ b/src/backend/commands/tablespace.c
@@ -58,6 +58,7 @@
 #include "access/xact.h"
 #include "access/xlog.h"
 #include "access/xloginsert.h"
+#include "access/xlogutils.h"
 #include "catalog/catalog.h"
 #include "catalog/dependency.h"
 #include "catalog/indexing.h"
@@ -1516,6 +1517,8 @@ tblspc_redo(XLogReaderState *record)
 	{
 		xl_tblspc_drop_rec *xlrec = (xl_tblspc_drop_rec *) XLogRecGetData(record);
 
+		XLogForgetMissingDir(xlrec->ts_id, InvalidOid, "");
+
 		/*
 		 * If we issued a WAL record for a drop tablespace it implies that
 		 * there were no files in it at all when the DROP was done. That means
diff --git a/src/include/access/xlogutils.h b/src/include/access/xlogutils.h
index 5181a077d9..4106735006 100644
--- a/src/include/access/xlogutils.h
+++ b/src/include/access/xlogutils.h
@@ -23,6 +23,10 @@ extern void XLogDropDatabase(Oid dbid);
 extern void XLogTruncateRelation(RelFileNode rnode, ForkNumber forkNum,
 								 BlockNumber nblocks);
 
+extern void XLogLogMissingDir(Oid spcNode, Oid dbNode, char *path);
+extern void XLogForgetMissingDir(Oid spcNode, Oid dbNode, char *path);
+extern void XLogCheckMissingDirs(void);
+
 /* Result codes for XLogReadBufferForRedo[Extended] */
 typedef enum
 {
diff --git a/src/include/commands/dbcommands.h b/src/include/commands/dbcommands.h
index f8f6d5ffd0..b71b400e70 100644
--- a/src/include/commands/dbcommands.h
+++ b/src/include/commands/dbcommands.h
@@ -19,6 +19,8 @@
 #include "lib/stringinfo.h"
 #include "nodes/parsenodes.h"
 
+extern void CheckMissingDirs4DbaseRedo(void);
+
 extern Oid	createdb(ParseState *pstate, const CreatedbStmt *stmt);
 extern void dropdb(const char *dbname, bool missing_ok, bool force);
 extern void DropDatabase(ParseState *pstate, DropdbStmt *stmt);
-- 
2.20.1

Reply via email to