On Thu, Jul 02, 2020 at 05:25:21PM -0400, Tom Lane wrote:
> I guess I'm completely confused about the purpose of these patches.
> Far from coping with the situation of an unseekable file, they appear
> to change pg_restore so that it fails altogether if it can't seek
> its input file.  Why would we want to do this?

I'm not sure where the "fails altogether if it can't seek" is. The
"Skip tables in pg_restore" patch retains the old fread() logic. The
--disable-seeking stuff was just to support tests, and thanks to
help from Justin Pryzby the tests no longer require it. I've attached
the updated patch set.

Note that this still shouldn't be merged because of Justin's bug report
in 20200706050129.gw4...@telsasoft.com which is unrelated to this change
but will leave you with flaky CI until it's fixed.

-- 
David Gilman  :DG<
https://gilslotd.com
>From a671dc328a7defa403ceb46b5107ed4ee5f53103 Mon Sep 17 00:00:00 2001
From: David Gilman <davidgilm...@gmail.com>
Date: Wed, 20 May 2020 22:49:28 -0400
Subject: [PATCH 1/5] Scan all TOCs when restoring a custom dump file without
 offsets

TOC requests are not guaranteed to come in disk order. If the custom
dump file was written with data offsets, pg_restore can seek directly to
the data, making request order irrelevant. If there are no data offsets,
pg_restore would never attempt to seek backwards, even when seeking is
possible, and would be unable to find TOCs before the current read
position in the file. 548e50976 changed how pg_restore's parallel
algorithm worked at the cost of greatly increasing out-of-order TOC
requests.

This patch changes pg_restore to scan through all TOCs to service a TOC
read request when restoring a custom dump file without data offsets.
The odds of getting a successful parallel restore go way up at the cost
of a bunch of extra tiny reads when pg_restore starts up.

The pg_restore manpage now warns against running pg_dump with an
unseekable output file and suggests that if you plan on doing a parallel
restore of a custom dump, you should run pg_dump with --file.
---
 doc/src/sgml/ref/pg_restore.sgml   |  8 ++++++++
 src/bin/pg_dump/pg_backup_custom.c | 25 ++++++++++++++++++++-----
 2 files changed, 28 insertions(+), 5 deletions(-)

diff --git a/doc/src/sgml/ref/pg_restore.sgml b/doc/src/sgml/ref/pg_restore.sgml
index 232f88024f..23286bb076 100644
--- a/doc/src/sgml/ref/pg_restore.sgml
+++ b/doc/src/sgml/ref/pg_restore.sgml
@@ -279,6 +279,14 @@ PostgreSQL documentation
         jobs cannot be used together with the
         option <option>--single-transaction</option>.
        </para>
+
+       <para>
+        <application>pg_restore</application> with concurrent jobs may fail
+        when restoring a custom archive format dump written to an unseekable
+        output stream, like stdout. To allow for concurrent restoration of
+        a custom archive format dump, use <application>pg_dump</application>'s
+        <option>--file</option> option to specify an output file.
+       </para>
       </listitem>
      </varlistentry>
 
diff --git a/src/bin/pg_dump/pg_backup_custom.c b/src/bin/pg_dump/pg_backup_custom.c
index 6ab122242c..a873ac3afa 100644
--- a/src/bin/pg_dump/pg_backup_custom.c
+++ b/src/bin/pg_dump/pg_backup_custom.c
@@ -413,6 +413,7 @@ _PrintTocData(ArchiveHandle *AH, TocEntry *te)
 	lclTocEntry *tctx = (lclTocEntry *) te->formatData;
 	int			blkType;
 	int			id;
+	bool		initialScan = true;
 
 	if (tctx->dataState == K_OFFSET_NO_DATA)
 		return;
@@ -421,13 +422,28 @@ _PrintTocData(ArchiveHandle *AH, TocEntry *te)
 	{
 		/*
 		 * We cannot seek directly to the desired block.  Instead, skip over
-		 * block headers until we find the one we want.  This could fail if we
-		 * are asked to restore items out-of-order.
+		 * block headers until we find the one we want.
 		 */
-		_readBlockHeader(AH, &blkType, &id);
 
-		while (blkType != EOF && id != te->dumpId)
+		for (;;)
 		{
+			_readBlockHeader(AH, &blkType, &id);
+
+			if (blkType == EOF && ctx->hasSeek && initialScan)
+			{
+				/*
+				 * This was possibly an out-of-order request. Try one extra
+				 * pass over the file to find the TOC.
+				 */
+				initialScan = false;
+				if (fseeko(AH->FH, ctx->dataStart, SEEK_SET) != 0)
+					fatal("error during file seek: %m");
+				continue;
+			}
+
+			if (blkType == EOF || id == te->dumpId)
+				break;
+
 			switch (blkType)
 			{
 				case BLK_DATA:
@@ -443,7 +459,6 @@ _PrintTocData(ArchiveHandle *AH, TocEntry *te)
 						  blkType);
 					break;
 			}
-			_readBlockHeader(AH, &blkType, &id);
 		}
 	}
 	else
-- 
2.27.0

>From 6e8294021482fb76cb17d6c150ec6acb6d0cac37 Mon Sep 17 00:00:00 2001
From: David Gilman <davidgilm...@gmail.com>
Date: Sat, 23 May 2020 10:49:33 -0400
Subject: [PATCH 2/5] Add integration test for out-of-order TOC requests in
 pg_restore

---
 src/bin/pg_dump/t/002_pg_dump.pl | 107 ++++++++++++++++++++++++++++++-
 src/test/perl/TestLib.pm         |  12 +++-
 2 files changed, 114 insertions(+), 5 deletions(-)

diff --git a/src/bin/pg_dump/t/002_pg_dump.pl b/src/bin/pg_dump/t/002_pg_dump.pl
index e116235769..d99e7a5d22 100644
--- a/src/bin/pg_dump/t/002_pg_dump.pl
+++ b/src/bin/pg_dump/t/002_pg_dump.pl
@@ -31,9 +31,20 @@ my $tempdir_short = TestLib::tempdir_short;
 # generate a text file to run through the tests from the
 # non-text file generated by pg_dump.
 #
-# TODO: Have pg_restore actually restore to an independent
-# database and then pg_dump *that* database (or something along
-# those lines) to validate that part of the process.
+# secondary_dump_cmd is an optional key that enables a
+#
+# 	pg_dump (dump_cmd) ->
+# 	pg_restore (restore_cmd, real database) ->
+#	pg_dump (secondary_dump_cmd, SQL output (for matching))
+#
+# test process instead of the default
+#
+# 	pg_dump (dump_cmd) ->
+# 	pg_restore (restore_cmd, SQL output (for matching))
+#
+# test process. The secondary_dump database is created for
+# you and your restore_cmd and secondary_dump_cmd must read
+# and write from it.
 
 my %pgdump_runs = (
 	binary_upgrade => {
@@ -139,6 +150,25 @@ my %pgdump_runs = (
 			"$tempdir/defaults_custom_format.dump",
 		],
 	},
+	defaults_custom_format_no_seek_parallel_restore => {
+		test_key => 'defaults',
+		dump_cmd => [
+			['pg_dump', '-Fc', '-Z6', '--no-sync', 'postgres'],
+			'|',
+			['perl', '-pe', ''],  # make pg_dump's stdout unseekable
+			">$tempdir/defaults_custom_format_no_seek_parallel_restore"
+		],
+		restore_cmd => [
+			'pg_restore', '-Fc', '--jobs=2',
+			'--dbname=secondary_dump',
+			"$tempdir/defaults_custom_format_no_seek_parallel_restore",
+		],
+		secondary_dump_cmd => [
+			'pg_dump', '-Fp', '--no-sync',
+			"--file=$tempdir/defaults_custom_format_no_seek_parallel_restore.sql",
+			'secondary_dump',
+		],
+	},
 
 	# Do not use --no-sync to give test coverage for data sync.
 	defaults_dir_format => {
@@ -153,6 +183,24 @@ my %pgdump_runs = (
 			"$tempdir/defaults_dir_format",
 		],
 	},
+	defaults_dir_format_parallel => {
+		test_key => 'defaults',
+		dump_cmd => [
+			'pg_dump',                                      '-Fd',
+			'--jobs=2',                                     '--no-sync',
+			"--file=$tempdir/defaults_dir_format_parallel", 'postgres',
+		],
+		restore_cmd => [
+			'pg_restore', '-Fd',
+			'--jobs=2', '--dbname=secondary_dump',
+			"$tempdir/defaults_dir_format_parallel",
+		],
+		secondary_dump_cmd => [
+			'pg_dump', '-Fp', '--no-sync',
+			"--file=$tempdir/defaults_dir_format_parallel.sql",
+			'secondary_dump',
+		],
+	},
 
 	# Do not use --no-sync to give test coverage for data sync.
 	defaults_parallel => {
@@ -3298,6 +3346,35 @@ my %tests = (
 			%full_runs, %dump_test_schema_runs, section_pre_data => 1,
 		},
 		unlike => { exclude_dump_test_schema => 1 },
+	},
+
+	# This reliably produces the "possibly due to out-of-order restore request"
+	# when restoring with -j 2 when this was written but future changes to how
+	# pg_dump works could accidentally correctly order things as we're not really
+	# telling pg_dump how to do its on-disk layout.
+	'custom dump out-of-order' => {
+		create_sql   => '
+			CREATE TABLE dump_test.ooo_parent0 (id int primary key);
+			CREATE TABLE dump_test.ooo_child0 (
+				parent0 int references dump_test.ooo_parent0 (id)
+			);
+			CREATE TABLE dump_test.ooo_parent1 (id int primary key);
+			CREATE TABLE dump_test.ooo_child1 (
+				parent0 int references dump_test.ooo_parent1 (id)
+			);',
+		regexp => qr/^
+			 \QCREATE TABLE dump_test.ooo_child0 (\E\n
+			 \s+\Qparent0 integer\E\n
+			 \Q);\E\n/xm,
+		like => {
+			%full_runs, %dump_test_schema_runs,
+			section_pre_data => 1,
+			defaults_custom_format_no_seek => 1,
+			defaults_custom_format_no_seek_parallel_restore => 1,
+		},
+		unlike => {
+			exclude_dump_test_schema => 1,
+		},
 	});
 
 #########################################
@@ -3350,6 +3427,11 @@ foreach my $run (sort keys %pgdump_runs)
 		$num_tests++;
 	}
 
+	if ($pgdump_runs{$run}->{secondary_dump_cmd})
+	{
+		$num_tests++;
+	}
+
 	if ($pgdump_runs{$run}->{test_key})
 	{
 		$test_key = $pgdump_runs{$run}->{test_key};
@@ -3499,12 +3581,23 @@ foreach my $run (sort keys %pgdump_runs)
 	$node->command_ok(\@{ $pgdump_runs{$run}->{dump_cmd} },
 		"$run: pg_dump runs");
 
+	if ($pgdump_runs{$run}->{secondary_dump_cmd})
+	{
+		$node->safe_psql('postgres', 'create database secondary_dump;');
+	}
+
 	if ($pgdump_runs{$run}->{restore_cmd})
 	{
 		$node->command_ok(\@{ $pgdump_runs{$run}->{restore_cmd} },
 			"$run: pg_restore runs");
 	}
 
+	if ($pgdump_runs{$run}->{secondary_dump_cmd})
+	{
+		$node->command_ok(\@{ $pgdump_runs{$run}->{secondary_dump_cmd} },
+			"$run: secondary pg_dump runs");
+	}
+
 	if ($pgdump_runs{$run}->{test_key})
 	{
 		$test_key = $pgdump_runs{$run}->{test_key};
@@ -3561,6 +3654,14 @@ foreach my $run (sort keys %pgdump_runs)
 			}
 		}
 	}
+
+	if ($pgdump_runs{$run}->{secondary_dump_cmd})
+	{
+		$node->safe_psql('secondary_dump',
+			'alter subscription sub1 set (slot_name = NONE);');
+		$node->safe_psql('secondary_dump', 'drop subscription sub1;');
+		$node->safe_psql('postgres',       'drop database secondary_dump;');
+	}
 }
 
 #########################################
diff --git a/src/test/perl/TestLib.pm b/src/test/perl/TestLib.pm
index d579d5c177..fb90d1abcc 100644
--- a/src/test/perl/TestLib.pm
+++ b/src/test/perl/TestLib.pm
@@ -338,8 +338,16 @@ The return value from the command is passed through.
 
 sub run_log
 {
-	print("# Running: " . join(" ", @{ $_[0] }) . "\n");
-	return IPC::Run::run(@_);
+	my $flatten = sub {
+		return map { ref eq 'ARRAY' ? @$_ : $_ } @_;
+	};
+	my @x = @{$_[0]};
+	print("# Running: " . join(" ", $flatten->(@x)) . "\n");
+	if (ref($x[0]) ne '') {
+		return IPC::Run::run(@x);
+	} else {
+		return IPC::Run::run(@_);
+	}
 }
 
 =pod
-- 
2.27.0

>From fa48bd25d5f4a3c4a36a0f6ef09898f816d79286 Mon Sep 17 00:00:00 2001
From: David Gilman <davidgilm...@gmail.com>
Date: Fri, 22 May 2020 17:27:51 -0400
Subject: [PATCH 3/5] Remove unused seek check in tar dump format

---
 src/bin/pg_dump/pg_backup_tar.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/src/bin/pg_dump/pg_backup_tar.c b/src/bin/pg_dump/pg_backup_tar.c
index b4f5942959..bc88c311c7 100644
--- a/src/bin/pg_dump/pg_backup_tar.c
+++ b/src/bin/pg_dump/pg_backup_tar.c
@@ -82,7 +82,6 @@ typedef struct
 
 typedef struct
 {
-	int			hasSeek;
 	pgoff_t		filePos;
 	TAR_MEMBER *blobToc;
 	FILE	   *tarFH;
@@ -192,8 +191,6 @@ InitArchiveFmt_Tar(ArchiveHandle *AH)
 		 */
 		/* setvbuf(ctx->tarFH, NULL, _IONBF, 0); */
 
-		ctx->hasSeek = checkSeek(ctx->tarFH);
-
 		/*
 		 * We don't support compression because reading the files back is not
 		 * possible since gzdopen uses buffered IO which totally screws file
@@ -226,8 +223,6 @@ InitArchiveFmt_Tar(ArchiveHandle *AH)
 
 		ctx->tarFHpos = 0;
 
-		ctx->hasSeek = checkSeek(ctx->tarFH);
-
 		/*
 		 * Forcibly unmark the header as read since we use the lookahead
 		 * buffer
-- 
2.27.0

>From 881d5db123af6fca008eb479ac89624be1b75c50 Mon Sep 17 00:00:00 2001
From: David Gilman <davidgilm...@gmail.com>
Date: Mon, 25 May 2020 17:34:52 -0400
Subject: [PATCH 4/5] Skip tables in pg_restore by seeking instead of reading

---
 src/bin/pg_dump/pg_backup_custom.c | 28 ++++++++++++++++++----------
 1 file changed, 18 insertions(+), 10 deletions(-)

diff --git a/src/bin/pg_dump/pg_backup_custom.c b/src/bin/pg_dump/pg_backup_custom.c
index a873ac3afa..16414113af 100644
--- a/src/bin/pg_dump/pg_backup_custom.c
+++ b/src/bin/pg_dump/pg_backup_custom.c
@@ -575,19 +575,27 @@ _skipData(ArchiveHandle *AH)
 	blkLen = ReadInt(AH);
 	while (blkLen != 0)
 	{
-		if (blkLen > buflen)
+		if (ctx->hasSeek)
 		{
-			if (buf)
-				free(buf);
-			buf = (char *) pg_malloc(blkLen);
-			buflen = blkLen;
+			if (fseeko(AH->FH, blkLen, SEEK_CUR) != 0)
+				fatal("error during file seek: %m");
 		}
-		if ((cnt = fread(buf, 1, blkLen, AH->FH)) != blkLen)
+		else
 		{
-			if (feof(AH->FH))
-				fatal("could not read from input file: end of file");
-			else
-				fatal("could not read from input file: %m");
+			if (blkLen > buflen)
+			{
+				if (buf)
+					free(buf);
+				buf = (char *) pg_malloc(blkLen);
+				buflen = blkLen;
+			}
+			if ((cnt = fread(buf, 1, blkLen, AH->FH)) != blkLen)
+			{
+				if (feof(AH->FH))
+					fatal("could not read from input file: end of file");
+				else
+					fatal("could not read from input file: %m");
+			}
 		}
 
 		ctx->filePos += blkLen;
-- 
2.27.0

Reply via email to