On Thu, Jul 02, 2020 at 05:25:21PM -0400, Tom Lane wrote: > I guess I'm completely confused about the purpose of these patches. > Far from coping with the situation of an unseekable file, they appear > to change pg_restore so that it fails altogether if it can't seek > its input file. Why would we want to do this?
I'm not sure where the "fails altogether if it can't seek" is. The "Skip tables in pg_restore" patch retains the old fread() logic. The --disable-seeking stuff was just to support tests, and thanks to help from Justin Pryzby the tests no longer require it. I've attached the updated patch set. Note that this still shouldn't be merged because of Justin's bug report in 20200706050129.gw4...@telsasoft.com which is unrelated to this change but will leave you with flaky CI until it's fixed. -- David Gilman :DG< https://gilslotd.com
>From a671dc328a7defa403ceb46b5107ed4ee5f53103 Mon Sep 17 00:00:00 2001 From: David Gilman <davidgilm...@gmail.com> Date: Wed, 20 May 2020 22:49:28 -0400 Subject: [PATCH 1/5] Scan all TOCs when restoring a custom dump file without offsets TOC requests are not guaranteed to come in disk order. If the custom dump file was written with data offsets, pg_restore can seek directly to the data, making request order irrelevant. If there are no data offsets, pg_restore would never attempt to seek backwards, even when seeking is possible, and would be unable to find TOCs before the current read position in the file. 548e50976 changed how pg_restore's parallel algorithm worked at the cost of greatly increasing out-of-order TOC requests. This patch changes pg_restore to scan through all TOCs to service a TOC read request when restoring a custom dump file without data offsets. The odds of getting a successful parallel restore go way up at the cost of a bunch of extra tiny reads when pg_restore starts up. The pg_restore manpage now warns against running pg_dump with an unseekable output file and suggests that if you plan on doing a parallel restore of a custom dump, you should run pg_dump with --file. --- doc/src/sgml/ref/pg_restore.sgml | 8 ++++++++ src/bin/pg_dump/pg_backup_custom.c | 25 ++++++++++++++++++++----- 2 files changed, 28 insertions(+), 5 deletions(-) diff --git a/doc/src/sgml/ref/pg_restore.sgml b/doc/src/sgml/ref/pg_restore.sgml index 232f88024f..23286bb076 100644 --- a/doc/src/sgml/ref/pg_restore.sgml +++ b/doc/src/sgml/ref/pg_restore.sgml @@ -279,6 +279,14 @@ PostgreSQL documentation jobs cannot be used together with the option <option>--single-transaction</option>. </para> + + <para> + <application>pg_restore</application> with concurrent jobs may fail + when restoring a custom archive format dump written to an unseekable + output stream, like stdout. To allow for concurrent restoration of + a custom archive format dump, use <application>pg_dump</application>'s + <option>--file</option> option to specify an output file. + </para> </listitem> </varlistentry> diff --git a/src/bin/pg_dump/pg_backup_custom.c b/src/bin/pg_dump/pg_backup_custom.c index 6ab122242c..a873ac3afa 100644 --- a/src/bin/pg_dump/pg_backup_custom.c +++ b/src/bin/pg_dump/pg_backup_custom.c @@ -413,6 +413,7 @@ _PrintTocData(ArchiveHandle *AH, TocEntry *te) lclTocEntry *tctx = (lclTocEntry *) te->formatData; int blkType; int id; + bool initialScan = true; if (tctx->dataState == K_OFFSET_NO_DATA) return; @@ -421,13 +422,28 @@ _PrintTocData(ArchiveHandle *AH, TocEntry *te) { /* * We cannot seek directly to the desired block. Instead, skip over - * block headers until we find the one we want. This could fail if we - * are asked to restore items out-of-order. + * block headers until we find the one we want. */ - _readBlockHeader(AH, &blkType, &id); - while (blkType != EOF && id != te->dumpId) + for (;;) { + _readBlockHeader(AH, &blkType, &id); + + if (blkType == EOF && ctx->hasSeek && initialScan) + { + /* + * This was possibly an out-of-order request. Try one extra + * pass over the file to find the TOC. + */ + initialScan = false; + if (fseeko(AH->FH, ctx->dataStart, SEEK_SET) != 0) + fatal("error during file seek: %m"); + continue; + } + + if (blkType == EOF || id == te->dumpId) + break; + switch (blkType) { case BLK_DATA: @@ -443,7 +459,6 @@ _PrintTocData(ArchiveHandle *AH, TocEntry *te) blkType); break; } - _readBlockHeader(AH, &blkType, &id); } } else -- 2.27.0
>From 6e8294021482fb76cb17d6c150ec6acb6d0cac37 Mon Sep 17 00:00:00 2001 From: David Gilman <davidgilm...@gmail.com> Date: Sat, 23 May 2020 10:49:33 -0400 Subject: [PATCH 2/5] Add integration test for out-of-order TOC requests in pg_restore --- src/bin/pg_dump/t/002_pg_dump.pl | 107 ++++++++++++++++++++++++++++++- src/test/perl/TestLib.pm | 12 +++- 2 files changed, 114 insertions(+), 5 deletions(-) diff --git a/src/bin/pg_dump/t/002_pg_dump.pl b/src/bin/pg_dump/t/002_pg_dump.pl index e116235769..d99e7a5d22 100644 --- a/src/bin/pg_dump/t/002_pg_dump.pl +++ b/src/bin/pg_dump/t/002_pg_dump.pl @@ -31,9 +31,20 @@ my $tempdir_short = TestLib::tempdir_short; # generate a text file to run through the tests from the # non-text file generated by pg_dump. # -# TODO: Have pg_restore actually restore to an independent -# database and then pg_dump *that* database (or something along -# those lines) to validate that part of the process. +# secondary_dump_cmd is an optional key that enables a +# +# pg_dump (dump_cmd) -> +# pg_restore (restore_cmd, real database) -> +# pg_dump (secondary_dump_cmd, SQL output (for matching)) +# +# test process instead of the default +# +# pg_dump (dump_cmd) -> +# pg_restore (restore_cmd, SQL output (for matching)) +# +# test process. The secondary_dump database is created for +# you and your restore_cmd and secondary_dump_cmd must read +# and write from it. my %pgdump_runs = ( binary_upgrade => { @@ -139,6 +150,25 @@ my %pgdump_runs = ( "$tempdir/defaults_custom_format.dump", ], }, + defaults_custom_format_no_seek_parallel_restore => { + test_key => 'defaults', + dump_cmd => [ + ['pg_dump', '-Fc', '-Z6', '--no-sync', 'postgres'], + '|', + ['perl', '-pe', ''], # make pg_dump's stdout unseekable + ">$tempdir/defaults_custom_format_no_seek_parallel_restore" + ], + restore_cmd => [ + 'pg_restore', '-Fc', '--jobs=2', + '--dbname=secondary_dump', + "$tempdir/defaults_custom_format_no_seek_parallel_restore", + ], + secondary_dump_cmd => [ + 'pg_dump', '-Fp', '--no-sync', + "--file=$tempdir/defaults_custom_format_no_seek_parallel_restore.sql", + 'secondary_dump', + ], + }, # Do not use --no-sync to give test coverage for data sync. defaults_dir_format => { @@ -153,6 +183,24 @@ my %pgdump_runs = ( "$tempdir/defaults_dir_format", ], }, + defaults_dir_format_parallel => { + test_key => 'defaults', + dump_cmd => [ + 'pg_dump', '-Fd', + '--jobs=2', '--no-sync', + "--file=$tempdir/defaults_dir_format_parallel", 'postgres', + ], + restore_cmd => [ + 'pg_restore', '-Fd', + '--jobs=2', '--dbname=secondary_dump', + "$tempdir/defaults_dir_format_parallel", + ], + secondary_dump_cmd => [ + 'pg_dump', '-Fp', '--no-sync', + "--file=$tempdir/defaults_dir_format_parallel.sql", + 'secondary_dump', + ], + }, # Do not use --no-sync to give test coverage for data sync. defaults_parallel => { @@ -3298,6 +3346,35 @@ my %tests = ( %full_runs, %dump_test_schema_runs, section_pre_data => 1, }, unlike => { exclude_dump_test_schema => 1 }, + }, + + # This reliably produces the "possibly due to out-of-order restore request" + # when restoring with -j 2 when this was written but future changes to how + # pg_dump works could accidentally correctly order things as we're not really + # telling pg_dump how to do its on-disk layout. + 'custom dump out-of-order' => { + create_sql => ' + CREATE TABLE dump_test.ooo_parent0 (id int primary key); + CREATE TABLE dump_test.ooo_child0 ( + parent0 int references dump_test.ooo_parent0 (id) + ); + CREATE TABLE dump_test.ooo_parent1 (id int primary key); + CREATE TABLE dump_test.ooo_child1 ( + parent0 int references dump_test.ooo_parent1 (id) + );', + regexp => qr/^ + \QCREATE TABLE dump_test.ooo_child0 (\E\n + \s+\Qparent0 integer\E\n + \Q);\E\n/xm, + like => { + %full_runs, %dump_test_schema_runs, + section_pre_data => 1, + defaults_custom_format_no_seek => 1, + defaults_custom_format_no_seek_parallel_restore => 1, + }, + unlike => { + exclude_dump_test_schema => 1, + }, }); ######################################### @@ -3350,6 +3427,11 @@ foreach my $run (sort keys %pgdump_runs) $num_tests++; } + if ($pgdump_runs{$run}->{secondary_dump_cmd}) + { + $num_tests++; + } + if ($pgdump_runs{$run}->{test_key}) { $test_key = $pgdump_runs{$run}->{test_key}; @@ -3499,12 +3581,23 @@ foreach my $run (sort keys %pgdump_runs) $node->command_ok(\@{ $pgdump_runs{$run}->{dump_cmd} }, "$run: pg_dump runs"); + if ($pgdump_runs{$run}->{secondary_dump_cmd}) + { + $node->safe_psql('postgres', 'create database secondary_dump;'); + } + if ($pgdump_runs{$run}->{restore_cmd}) { $node->command_ok(\@{ $pgdump_runs{$run}->{restore_cmd} }, "$run: pg_restore runs"); } + if ($pgdump_runs{$run}->{secondary_dump_cmd}) + { + $node->command_ok(\@{ $pgdump_runs{$run}->{secondary_dump_cmd} }, + "$run: secondary pg_dump runs"); + } + if ($pgdump_runs{$run}->{test_key}) { $test_key = $pgdump_runs{$run}->{test_key}; @@ -3561,6 +3654,14 @@ foreach my $run (sort keys %pgdump_runs) } } } + + if ($pgdump_runs{$run}->{secondary_dump_cmd}) + { + $node->safe_psql('secondary_dump', + 'alter subscription sub1 set (slot_name = NONE);'); + $node->safe_psql('secondary_dump', 'drop subscription sub1;'); + $node->safe_psql('postgres', 'drop database secondary_dump;'); + } } ######################################### diff --git a/src/test/perl/TestLib.pm b/src/test/perl/TestLib.pm index d579d5c177..fb90d1abcc 100644 --- a/src/test/perl/TestLib.pm +++ b/src/test/perl/TestLib.pm @@ -338,8 +338,16 @@ The return value from the command is passed through. sub run_log { - print("# Running: " . join(" ", @{ $_[0] }) . "\n"); - return IPC::Run::run(@_); + my $flatten = sub { + return map { ref eq 'ARRAY' ? @$_ : $_ } @_; + }; + my @x = @{$_[0]}; + print("# Running: " . join(" ", $flatten->(@x)) . "\n"); + if (ref($x[0]) ne '') { + return IPC::Run::run(@x); + } else { + return IPC::Run::run(@_); + } } =pod -- 2.27.0
>From fa48bd25d5f4a3c4a36a0f6ef09898f816d79286 Mon Sep 17 00:00:00 2001 From: David Gilman <davidgilm...@gmail.com> Date: Fri, 22 May 2020 17:27:51 -0400 Subject: [PATCH 3/5] Remove unused seek check in tar dump format --- src/bin/pg_dump/pg_backup_tar.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/bin/pg_dump/pg_backup_tar.c b/src/bin/pg_dump/pg_backup_tar.c index b4f5942959..bc88c311c7 100644 --- a/src/bin/pg_dump/pg_backup_tar.c +++ b/src/bin/pg_dump/pg_backup_tar.c @@ -82,7 +82,6 @@ typedef struct typedef struct { - int hasSeek; pgoff_t filePos; TAR_MEMBER *blobToc; FILE *tarFH; @@ -192,8 +191,6 @@ InitArchiveFmt_Tar(ArchiveHandle *AH) */ /* setvbuf(ctx->tarFH, NULL, _IONBF, 0); */ - ctx->hasSeek = checkSeek(ctx->tarFH); - /* * We don't support compression because reading the files back is not * possible since gzdopen uses buffered IO which totally screws file @@ -226,8 +223,6 @@ InitArchiveFmt_Tar(ArchiveHandle *AH) ctx->tarFHpos = 0; - ctx->hasSeek = checkSeek(ctx->tarFH); - /* * Forcibly unmark the header as read since we use the lookahead * buffer -- 2.27.0
>From 881d5db123af6fca008eb479ac89624be1b75c50 Mon Sep 17 00:00:00 2001 From: David Gilman <davidgilm...@gmail.com> Date: Mon, 25 May 2020 17:34:52 -0400 Subject: [PATCH 4/5] Skip tables in pg_restore by seeking instead of reading --- src/bin/pg_dump/pg_backup_custom.c | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/src/bin/pg_dump/pg_backup_custom.c b/src/bin/pg_dump/pg_backup_custom.c index a873ac3afa..16414113af 100644 --- a/src/bin/pg_dump/pg_backup_custom.c +++ b/src/bin/pg_dump/pg_backup_custom.c @@ -575,19 +575,27 @@ _skipData(ArchiveHandle *AH) blkLen = ReadInt(AH); while (blkLen != 0) { - if (blkLen > buflen) + if (ctx->hasSeek) { - if (buf) - free(buf); - buf = (char *) pg_malloc(blkLen); - buflen = blkLen; + if (fseeko(AH->FH, blkLen, SEEK_CUR) != 0) + fatal("error during file seek: %m"); } - if ((cnt = fread(buf, 1, blkLen, AH->FH)) != blkLen) + else { - if (feof(AH->FH)) - fatal("could not read from input file: end of file"); - else - fatal("could not read from input file: %m"); + if (blkLen > buflen) + { + if (buf) + free(buf); + buf = (char *) pg_malloc(blkLen); + buflen = blkLen; + } + if ((cnt = fread(buf, 1, blkLen, AH->FH)) != blkLen) + { + if (feof(AH->FH)) + fatal("could not read from input file: end of file"); + else + fatal("could not read from input file: %m"); + } } ctx->filePos += blkLen; -- 2.27.0