Re: wrong fds used for refilenodes after pg_upgrade relfilenode changes Reply-To:

Thomas Munro Tue, 03 May 2022 19:24:55 -0700

On Wed, May 4, 2022 at 8:53 AM Thomas Munro <thomas.mu...@gmail.com> wrote:
> Got some off-list clues: that's just distracting Perl cleanup noise
> after something else went wrong (thanks Robert), and now I'm testing a
> theory from Andres that we're missing a barrier on the redo side when
> replaying XLOG_DBASE_CREATE_FILE_COPY.  More soon.


Yeah, looks like that was the explanation.  Presumably in older
releases, recovery can fail with EACCES here, and since commit
e2f0f8ed we get ENOENT, because someone's got an unlinked file open,
and ReadDir() can still see it.  (I've wondered before if ReadDir()
should also hide zombie Windows directory entries, but that's kinda
independent and would only get us one step further, a later rmdir()
would still fail.)  Adding the barrier fixes the problem.  Assuming no
objections or CI failures show up, I'll consider pushing the first two
patches tomorrow.

From 1c5e04e7704ab9d5bd2559338ce1b6bd8a397e51 Mon Sep 17 00:00:00 2001
From: Thomas Munro <thomas.munro@gmail.com>
Date: Sat, 2 Apr 2022 17:05:39 +1300
Subject: [PATCH v4 1/3] Rethink PROCSIGNAL_BARRIER_SMGRRELEASE.

With sufficiently bad luck, it was possible for IssuePendingWritebacks()
to reopen files after we'd processed PROCSIGNAL_BARRIER_SMGRRELEASE and
before the file was unlinked by some other backend, leading to potential
errors on Windows.

Defend against that by closing md.c's segments, instead of just closing
fd.c's descriptors, and then teaching smgrwriteback() not to open files
that aren't already open.

This fixes an overlooked edge case for commit 4eb21763.

Reported-by: Andres Freund <andres@anarazel.de>
Reviewed-by: Robert Haas <robertmhaas@gmail.com>
Discussion: https://postgr.es/m/20220209220004.kb3dgtn2x2k2gtdm%40alap3.anarazel.de
---
 src/backend/storage/smgr/md.c   | 22 +++++++++-------
 src/backend/storage/smgr/smgr.c | 45 +++++++++++++++++++++++++++------
 src/include/storage/md.h        |  1 -
 src/include/storage/smgr.h      |  2 ++
 4 files changed, 52 insertions(+), 18 deletions(-)

diff --git a/src/backend/storage/smgr/md.c b/src/backend/storage/smgr/md.c
index 286dd3f755..41fa73087b 100644
--- a/src/backend/storage/smgr/md.c
+++ b/src/backend/storage/smgr/md.c
@@ -116,6 +116,8 @@ static MemoryContext MdCxt;		/* context for all MdfdVec objects */
  * mdnblocks().
  */
 #define EXTENSION_DONT_CHECK_SIZE	(1 << 4)
+/* don't try to open a segment, if not already open */
+#define EXTENSION_DONT_OPEN			(1 << 5)
 
 
 /* local routines */
@@ -551,12 +553,6 @@ mdclose(SMgrRelation reln, ForkNumber forknum)
 	}
 }
 
-void
-mdrelease(void)
-{
-	closeAllVfds();
-}
-
 /*
  *	mdprefetch() -- Initiate asynchronous read of the specified block of a relation
  */
@@ -605,11 +601,14 @@ mdwriteback(SMgrRelation reln, ForkNumber forknum,
 					segnum_end;
 
 		v = _mdfd_getseg(reln, forknum, blocknum, true /* not used */ ,
-						 EXTENSION_RETURN_NULL);
+						 EXTENSION_DONT_OPEN);
 
 		/*
 		 * We might be flushing buffers of already removed relations, that's
-		 * ok, just ignore that case.
+		 * ok, just ignore that case.  If the segment file wasn't open already
+		 * (ie from a recent mdwrite()), then we don't want to re-open it, to
+		 * avoid a race with PROCSIGNAL_BARRIER_SMGRRELEASE that might leave us
+		 * with a descriptor to a file that is about to be unlinked.
 		 */
 		if (!v)
 			return;
@@ -1202,7 +1201,8 @@ _mdfd_getseg(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno,
 
 	/* some way to handle non-existent segments needs to be specified */
 	Assert(behavior &
-		   (EXTENSION_FAIL | EXTENSION_CREATE | EXTENSION_RETURN_NULL));
+		   (EXTENSION_FAIL | EXTENSION_CREATE | EXTENSION_RETURN_NULL |
+			EXTENSION_DONT_OPEN));
 
 	targetseg = blkno / ((BlockNumber) RELSEG_SIZE);
 
@@ -1213,6 +1213,10 @@ _mdfd_getseg(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno,
 		return v;
 	}
 
+	/* The caller only wants the segment if we already had it open. */
+	if (behavior & EXTENSION_DONT_OPEN)
+		return NULL;
+
 	/*
 	 * The target segment is not yet open. Iterate over all the segments
 	 * between the last opened and the target segment. This way missing
diff --git a/src/backend/storage/smgr/smgr.c b/src/backend/storage/smgr/smgr.c
index 2c7a2b2857..9cbfaa86cb 100644
--- a/src/backend/storage/smgr/smgr.c
+++ b/src/backend/storage/smgr/smgr.c
@@ -41,7 +41,6 @@ typedef struct f_smgr
 {
 	void		(*smgr_init) (void);	/* may be NULL */
 	void		(*smgr_shutdown) (void);	/* may be NULL */
-	void		(*smgr_release) (void); /* may be NULL */
 	void		(*smgr_open) (SMgrRelation reln);
 	void		(*smgr_close) (SMgrRelation reln, ForkNumber forknum);
 	void		(*smgr_create) (SMgrRelation reln, ForkNumber forknum,
@@ -70,7 +69,6 @@ static const f_smgr smgrsw[] = {
 	{
 		.smgr_init = mdinit,
 		.smgr_shutdown = NULL,
-		.smgr_release = mdrelease,
 		.smgr_open = mdopen,
 		.smgr_close = mdclose,
 		.smgr_create = mdcreate,
@@ -281,6 +279,42 @@ smgrclose(SMgrRelation reln)
 		*owner = NULL;
 }
 
+/*
+ *	smgrrelease() -- Release all resources used by this object.
+ *
+ *	The object remains valid.
+ */
+void
+smgrrelease(SMgrRelation reln)
+{
+	for (int i = 0; i <= MAX_FORKNUM; ++i)
+	{
+		smgrsw[reln->smgr_which].smgr_close(reln, i);
+		reln->smgr_cached_nblocks[i] = InvalidBlockNumber;
+	}
+}
+
+/*
+ *	smgrreleaseall() -- Release resources used by all objects.
+ *
+ *	This is called for PROCSIGNAL_BARRIER_SMGRRELEASE.
+ */
+void
+smgrreleaseall(void)
+{
+	HASH_SEQ_STATUS status;
+	SMgrRelation reln;
+
+	/* Nothing to do if hashtable not set up */
+	if (SMgrRelationHash == NULL)
+		return;
+
+	hash_seq_init(&status, SMgrRelationHash);
+
+	while ((reln = (SMgrRelation) hash_seq_search(&status)) != NULL)
+		smgrrelease(reln);
+}
+
 /*
  *	smgrcloseall() -- Close all existing SMgrRelation objects.
  */
@@ -698,11 +732,6 @@ AtEOXact_SMgr(void)
 bool
 ProcessBarrierSmgrRelease(void)
 {
-	for (int i = 0; i < NSmgr; i++)
-	{
-		if (smgrsw[i].smgr_release)
-			smgrsw[i].smgr_release();
-	}
-
+	smgrreleaseall();
 	return true;
 }
diff --git a/src/include/storage/md.h b/src/include/storage/md.h
index 6e46d8d96a..ffffa40db7 100644
--- a/src/include/storage/md.h
+++ b/src/include/storage/md.h
@@ -23,7 +23,6 @@
 extern void mdinit(void);
 extern void mdopen(SMgrRelation reln);
 extern void mdclose(SMgrRelation reln, ForkNumber forknum);
-extern void mdrelease(void);
 extern void mdcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo);
 extern bool mdexists(SMgrRelation reln, ForkNumber forknum);
 extern void mdunlink(RelFileNodeBackend rnode, ForkNumber forknum, bool isRedo);
diff --git a/src/include/storage/smgr.h b/src/include/storage/smgr.h
index 8e3ef92cda..6b63c60fbd 100644
--- a/src/include/storage/smgr.h
+++ b/src/include/storage/smgr.h
@@ -85,6 +85,8 @@ extern void smgrclearowner(SMgrRelation *owner, SMgrRelation reln);
 extern void smgrclose(SMgrRelation reln);
 extern void smgrcloseall(void);
 extern void smgrclosenode(RelFileNodeBackend rnode);
+extern void smgrrelease(SMgrRelation reln);
+extern void smgrreleaseall(void);
 extern void smgrcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo);
 extern void smgrdosyncall(SMgrRelation *rels, int nrels);
 extern void smgrdounlinkall(SMgrRelation *rels, int nrels, bool isRedo);
-- 
2.36.0

From a34d903fd2614bcdd089c91202333db62982c3ba Mon Sep 17 00:00:00 2001
From: Andres Freund <andres@anarazel.de>
Date: Mon, 21 Feb 2022 16:42:16 -0800
Subject: [PATCH v4 2/3] Fix old-fd issues using global barriers everywhere.

Commit 4eb21763 (and later improvement XXX) introduce a way to force
every backend to closed all relation files, to fix a long-standing
Windows-only problems.

This commit extends that behavior to all operating systems, to handle
a totally different class of problem: the reuse of relfilenodes in
scenarios that have no other kind of cache invalidation to prevent
file descriptor mix-ups.

In all releases, the problem could happen when you moved a database to
another tablespace and then back again.  For now, no back-patch as the
infrastructure required is invasive.  In 15, since commit aa010514, it
could happen when using CREATE DATABASE with a user-supplied OID,
particularly as part of pg_upgrade.

Author: Andres Freund <andres@anarazel.de>
Reviewed-by: Robert Haas <robertmhaas@gmail.com>
Reviewed-by: Thomas Munro <thomas.munro@gmail.com>
Discussion: https://postgr.es/m/20220209220004.kb3dgtn2x2k2gtdm%40alap3.anarazel.de
---
 src/backend/commands/dbcommands.c            |   9 +-
 src/backend/commands/tablespace.c            |  11 +-
 src/include/pg_config_manual.h               |  11 -
 src/test/recovery/Makefile                   |   2 +-
 src/test/recovery/t/032_relfilenode_reuse.pl | 233 +++++++++++++++++++
 5 files changed, 241 insertions(+), 25 deletions(-)
 create mode 100644 src/test/recovery/t/032_relfilenode_reuse.pl

diff --git a/src/backend/commands/dbcommands.c b/src/backend/commands/dbcommands.c
index 9d0f83cde3..6da58437c5 100644
--- a/src/backend/commands/dbcommands.c
+++ b/src/backend/commands/dbcommands.c
@@ -1687,10 +1687,8 @@ dropdb(const char *dbname, bool missing_ok, bool force)
 	 */
 	RequestCheckpoint(CHECKPOINT_IMMEDIATE | CHECKPOINT_FORCE | CHECKPOINT_WAIT);
 
-#if defined(USE_BARRIER_SMGRRELEASE)
 	/* Close all smgr fds in all backends. */
 	WaitForProcSignalBarrier(EmitProcSignalBarrier(PROCSIGNAL_BARRIER_SMGRRELEASE));
-#endif
 
 	/*
 	 * Remove all tablespace subdirs belonging to the database.
@@ -1940,10 +1938,8 @@ movedb(const char *dbname, const char *tblspcname)
 	RequestCheckpoint(CHECKPOINT_IMMEDIATE | CHECKPOINT_FORCE | CHECKPOINT_WAIT
 					  | CHECKPOINT_FLUSH_ALL);
 
-#if defined(USE_BARRIER_SMGRRELEASE)
 	/* Close all smgr fds in all backends. */
 	WaitForProcSignalBarrier(EmitProcSignalBarrier(PROCSIGNAL_BARRIER_SMGRRELEASE));
-#endif
 
 	/*
 	 * Now drop all buffers holding data of the target database; they should
@@ -3054,6 +3050,9 @@ dbase_redo(XLogReaderState *record)
 		 */
 		FlushDatabaseBuffers(xlrec->src_db_id);
 
+		/* Close all sgmr fds in all backends. */
+		WaitForProcSignalBarrier(EmitProcSignalBarrier(PROCSIGNAL_BARRIER_SMGRRELEASE));
+
 		/*
 		 * Copy this subdirectory to the new location
 		 *
@@ -3111,10 +3110,8 @@ dbase_redo(XLogReaderState *record)
 		/* Clean out the xlog relcache too */
 		XLogDropDatabase(xlrec->db_id);
 
-#if defined(USE_BARRIER_SMGRRELEASE)
 		/* Close all sgmr fds in all backends. */
 		WaitForProcSignalBarrier(EmitProcSignalBarrier(PROCSIGNAL_BARRIER_SMGRRELEASE));
-#endif
 
 		for (i = 0; i < xlrec->ntablespaces; i++)
 		{
diff --git a/src/backend/commands/tablespace.c b/src/backend/commands/tablespace.c
index 40514ab550..822d65287e 100644
--- a/src/backend/commands/tablespace.c
+++ b/src/backend/commands/tablespace.c
@@ -548,11 +548,10 @@ DropTableSpace(DropTableSpaceStmt *stmt)
 		 * use a global barrier to ask all backends to close all files, and
 		 * wait until they're finished.
 		 */
-#if defined(USE_BARRIER_SMGRRELEASE)
 		LWLockRelease(TablespaceCreateLock);
 		WaitForProcSignalBarrier(EmitProcSignalBarrier(PROCSIGNAL_BARRIER_SMGRRELEASE));
 		LWLockAcquire(TablespaceCreateLock, LW_EXCLUSIVE);
-#endif
+
 		/* And now try again. */
 		if (!destroy_tablespace_directories(tablespaceoid, false))
 		{
@@ -1574,6 +1573,9 @@ tblspc_redo(XLogReaderState *record)
 	{
 		xl_tblspc_drop_rec *xlrec = (xl_tblspc_drop_rec *) XLogRecGetData(record);
 
+		/* Close all smgr fds in all backends. */
+		WaitForProcSignalBarrier(EmitProcSignalBarrier(PROCSIGNAL_BARRIER_SMGRRELEASE));
+
 		/*
 		 * If we issued a WAL record for a drop tablespace it implies that
 		 * there were no files in it at all when the DROP was done. That means
@@ -1591,11 +1593,6 @@ tblspc_redo(XLogReaderState *record)
 		 */
 		if (!destroy_tablespace_directories(xlrec->ts_id, true))
 		{
-#if defined(USE_BARRIER_SMGRRELEASE)
-			/* Close all smgr fds in all backends. */
-			WaitForProcSignalBarrier(EmitProcSignalBarrier(PROCSIGNAL_BARRIER_SMGRRELEASE));
-#endif
-
 			ResolveRecoveryConflictWithTablespace(xlrec->ts_id);
 
 			/*
diff --git a/src/include/pg_config_manual.h b/src/include/pg_config_manual.h
index 84ce5a4a5d..8d2e3e3a57 100644
--- a/src/include/pg_config_manual.h
+++ b/src/include/pg_config_manual.h
@@ -152,17 +152,6 @@
 #define EXEC_BACKEND
 #endif
 
-/*
- * If USE_BARRIER_SMGRRELEASE is defined, certain code paths that unlink
- * directories will ask other backends to close all smgr file descriptors.
- * This is enabled on Windows, because otherwise unlinked but still open files
- * can prevent rmdir(containing_directory) from succeeding.  On other
- * platforms, it can be defined to exercise those code paths.
- */
-#if defined(WIN32)
-#define USE_BARRIER_SMGRRELEASE
-#endif
-
 /*
  * Define this if your operating system supports link()
  */
diff --git a/src/test/recovery/Makefile b/src/test/recovery/Makefile
index da5b9ff397..c47eee273b 100644
--- a/src/test/recovery/Makefile
+++ b/src/test/recovery/Makefile
@@ -9,7 +9,7 @@
 #
 #-------------------------------------------------------------------------
 
-EXTRA_INSTALL=contrib/test_decoding
+EXTRA_INSTALL=contrib/test_decoding contrib/pg_prewarm
 
 subdir = src/test/recovery
 top_builddir = ../../..
diff --git a/src/test/recovery/t/032_relfilenode_reuse.pl b/src/test/recovery/t/032_relfilenode_reuse.pl
new file mode 100644
index 0000000000..22d8e85614
--- /dev/null
+++ b/src/test/recovery/t/032_relfilenode_reuse.pl
@@ -0,0 +1,233 @@
+use strict;
+use warnings;
+use PostgreSQL::Test::Cluster;
+use PostgreSQL::Test::Utils;
+use Test::More;
+use File::Basename;
+
+
+my $node_primary = PostgreSQL::Test::Cluster->new('primary');
+$node_primary->init(allows_streaming => 1);
+$node_primary->append_conf('postgresql.conf', q[
+allow_in_place_tablespaces = true
+log_connections=on
+# to avoid "repairing" corruption
+full_page_writes=off
+log_min_messages=debug2
+autovacuum_naptime=1s
+shared_buffers=1MB
+]);
+$node_primary->start;
+
+
+# Create streaming standby linking to primary
+my $backup_name = 'my_backup';
+$node_primary->backup($backup_name);
+my $node_standby = PostgreSQL::Test::Cluster->new('standby');
+$node_standby->init_from_backup($node_primary, $backup_name,
+	has_streaming => 1);
+$node_standby->start;
+
+# To avoid hanging while expecting some specific input from a psql
+# instance being driven by us, add a timeout high enough that it
+# should never trigger even on very slow machines, unless something
+# is really wrong.
+my $psql_timeout = IPC::Run::timer(300);
+
+my %psql_primary = (stdin => '', stdout => '', stderr => '');
+$psql_primary{run} = IPC::Run::start(
+	[ 'psql', '-XA', '-f', '-', '-d', $node_primary->connstr('postgres') ],
+	'<',
+	\$psql_primary{stdin},
+	'>',
+	\$psql_primary{stdout},
+	'2>',
+	\$psql_primary{stderr},
+	$psql_timeout);
+
+my %psql_standby = ('stdin' => '', 'stdout' => '', 'stderr' => '');
+$psql_standby{run} = IPC::Run::start(
+	[ 'psql', '-XA', '-f', '-', '-d', $node_standby->connstr('postgres') ],
+	'<',
+	\$psql_standby{stdin},
+	'>',
+	\$psql_standby{stdout},
+	'2>',
+	\$psql_standby{stderr},
+	$psql_timeout);
+
+
+# Create template database with a table that we'll update, to trigger dirty
+# rows. Using a template database + preexisting rows makes it a bit easier to
+# reproduce, because there's no cache invalidations generated.
+
+$node_primary->safe_psql('postgres', "CREATE DATABASE conflict_db_template OID = 50000;");
+$node_primary->safe_psql('conflict_db_template', q[
+    CREATE TABLE large(id serial primary key, dataa text, datab text);
+    INSERT INTO large(dataa, datab) SELECT g.i::text, 1 FROM generate_series(1, 4000) g(i);]);
+$node_primary->safe_psql('postgres', "CREATE DATABASE conflict_db TEMPLATE conflict_db_template OID = 50001;");
+
+$node_primary->safe_psql('postgres', q[
+    CREATE EXTENSION pg_prewarm;
+    CREATE TABLE replace_sb(data text);
+    INSERT INTO replace_sb(data) SELECT random()::text FROM generate_series(1, 15000);]);
+
+# Use longrunning transactions, so that AtEOXact_SMgr doesn't close files
+send_query_and_wait(
+	\%psql_primary,
+	q[BEGIN;],
+	qr/BEGIN/m);
+send_query_and_wait(
+	\%psql_standby,
+	q[BEGIN;],
+	qr/BEGIN/m);
+
+# Cause lots of dirty rows in shared_buffers
+$node_primary->safe_psql('conflict_db', "UPDATE large SET datab = 1;");
+
+# Now do a bunch of work in another database. That will end up needing to
+# write back dirty data from the previous step, opening the relevant file
+# descriptors
+cause_eviction(\%psql_primary, \%psql_standby);
+
+# drop and recreate database
+$node_primary->safe_psql('postgres', "DROP DATABASE conflict_db;");
+$node_primary->safe_psql('postgres', "CREATE DATABASE conflict_db TEMPLATE conflict_db_template OID = 50001;");
+
+verify($node_primary, $node_standby, 1,
+	   "initial contents as expected");
+
+# Again cause lots of dirty rows in shared_buffers, but use a different update
+# value so we can check everything is OK
+$node_primary->safe_psql('conflict_db', "UPDATE large SET datab = 2;");
+
+# Again cause a lot of IO. That'll again write back dirty data, but uses (XXX
+# adjust after bugfix) the already opened file descriptor.
+# FIXME
+cause_eviction(\%psql_primary, \%psql_standby);
+
+verify($node_primary, $node_standby, 2,
+	   "update to reused relfilenode (due to DB oid conflict) is not lost");
+
+
+$node_primary->safe_psql('conflict_db', "VACUUM FULL large;");
+$node_primary->safe_psql('conflict_db', "UPDATE large SET datab = 3;");
+
+verify($node_primary, $node_standby, 3,
+	   "restored contents as expected");
+
+# Test for old filehandles after moving a database in / out of tablespace
+$node_primary->safe_psql('postgres', q[CREATE TABLESPACE test_tablespace LOCATION '']);
+
+# cause dirty buffers
+$node_primary->safe_psql('conflict_db', "UPDATE large SET datab = 4;");
+# cause files to be opened in backend in other database
+cause_eviction(\%psql_primary, \%psql_standby);
+
+# move database back / forth
+$node_primary->safe_psql('postgres', 'ALTER DATABASE conflict_db SET TABLESPACE test_tablespace');
+$node_primary->safe_psql('postgres', 'ALTER DATABASE conflict_db SET TABLESPACE pg_default');
+
+# cause dirty buffers
+$node_primary->safe_psql('conflict_db', "UPDATE large SET datab = 5;");
+cause_eviction(\%psql_primary, \%psql_standby);
+
+verify($node_primary, $node_standby, 5,
+	   "post move contents as expected");
+
+$node_primary->safe_psql('postgres', 'ALTER DATABASE conflict_db SET TABLESPACE test_tablespace');
+
+$node_primary->safe_psql('conflict_db', "UPDATE large SET datab = 7;");
+cause_eviction(\%psql_primary, \%psql_standby);
+$node_primary->safe_psql('conflict_db', "UPDATE large SET datab = 8;");
+$node_primary->safe_psql('postgres', 'DROP DATABASE conflict_db');
+$node_primary->safe_psql('postgres', 'DROP TABLESPACE test_tablespace');
+
+$node_primary->safe_psql('postgres', 'REINDEX TABLE pg_database');
+
+
+# explicitly shut down psql instances gracefully - to avoid hangs
+# or worse on windows
+$psql_primary{stdin} .= "\\q\n";
+$psql_primary{run}->finish;
+$psql_standby{stdin} .= "\\q\n";
+$psql_standby{run}->finish;
+
+$node_primary->stop();
+$node_standby->stop();
+
+# Make sure that there weren't crashes during shutdown
+
+command_like([ 'pg_controldata', $node_primary->data_dir ],
+	qr/Database cluster state:\s+shut down\n/, 'primary shut down ok');
+command_like([ 'pg_controldata', $node_standby->data_dir ],
+	qr/Database cluster state:\s+shut down in recovery\n/, 'standby shut down ok');
+done_testing();
+
+sub verify
+{
+	my ($primary, $standby, $counter, $message) = @_;
+
+	my $query = "SELECT datab, count(*) FROM large GROUP BY 1 ORDER BY 1 LIMIT 10";
+	is($primary->safe_psql('conflict_db', $query),
+	   "$counter|4000",
+	   "primary: $message");
+
+	$primary->wait_for_catchup($standby);
+	is($standby->safe_psql('conflict_db', $query),
+	   "$counter|4000",
+	   "standby: $message");
+}
+
+sub cause_eviction
+{
+	my ($psql_primary, $psql_standby) = @_;
+
+	send_query_and_wait(
+		$psql_primary,
+		q[SELECT SUM(pg_prewarm(oid)) warmed_buffers FROM pg_class WHERE pg_relation_filenode(oid) != 0;],
+		qr/warmed_buffers/m);
+
+	send_query_and_wait(
+		$psql_standby,
+		q[SELECT SUM(pg_prewarm(oid)) warmed_buffers FROM pg_class WHERE pg_relation_filenode(oid) != 0;],
+		qr/warmed_buffers/m);
+}
+
+# Send query, wait until string matches
+sub send_query_and_wait
+{
+	my ($psql, $query, $untl) = @_;
+	my $ret;
+
+	# send query
+	$$psql{stdin} .= $query;
+	$$psql{stdin} .= "\n";
+
+	# wait for query results
+	$$psql{run}->pump_nb();
+	while (1)
+	{
+		last if $$psql{stdout} =~ /$untl/;
+
+		if ($psql_timeout->is_expired)
+		{
+			BAIL_OUT("aborting wait: program timed out\n"
+				  . "stream contents: >>$$psql{stdout}<<\n"
+				  . "pattern searched for: $untl\n");
+			return 0;
+		}
+		if (not $$psql{run}->pumpable())
+		{
+			BAIL_OUT("aborting wait: program died\n"
+				  . "stream contents: >>$$psql{stdout}<<\n"
+				  . "pattern searched for: $untl\n");
+			return 0;
+		}
+		$$psql{run}->pump();
+	}
+
+	$$psql{stdout} = '';
+
+	return 1;
+}
-- 
2.36.0

Re: wrong fds used for refilenodes after pg_upgrade relfilenode changes Reply-To:

Reply via email to