On Thu, 2025-03-27 at 17:07 +0530, Ashutosh Bapat wrote: > Pulled the latest sources but the test is still failing with the same > differences.
The attached set of patches (your 0001 and 0002, combined with my patch v2j-0003) applied on master (058b5152f0) are passing the pg_upgrade test suite for me. Are you saying that the tests don't work for you even when v2j-0003 is applied? Or are you saying that your tests are failing on master, and that v2j-0002 should be committed? Regards, Jeff Davis
From 6fc3b98dc9a2589b9943e075b492b4c31044c14e Mon Sep 17 00:00:00 2001 From: Ashutosh Bapat <ashutosh.ba...@enterprisedb.com> Date: Thu, 27 Jun 2024 10:03:53 +0530 Subject: [PATCH v2j 1/3] Test pg_dump/restore of regression objects 002_pg_upgrade.pl tests pg_upgrade of the regression database left behind by regression run. Modify it to test dump and restore of the regression database as well. Regression database created by regression run contains almost all the database objects supported by PostgreSQL in various states. Hence the new testcase covers dump and restore scenarios not covered by individual dump/restore cases. Till now 002_pg_upgrade only tested dump/restore through pg_upgrade which only uses binary mode. Many regression tests mention that they leave objects behind for dump/restore testing but they are not tested in a non-binary mode. The new testcase closes that gap. Testing dump and restore of regression database makes this test run longer for a relatively smaller benefit. Hence run it only when explicitly requested by user by specifying "regress_dump_test" in PG_TEST_EXTRA. Note For the reviewers: The new test has uncovered many bugs so far in one year. 1. Introduced by 14e87ffa5c54. Fixed in fd41ba93e4630921a72ed5127cd0d552a8f3f8fc. 2. Introduced by 0413a556990ba628a3de8a0b58be020fd9a14ed0. Reverted in 74563f6b90216180fc13649725179fc119dddeb5. 3. Fixed by d611f8b1587b8f30caa7c0da99ae5d28e914d54f 3. Being discussed on hackers at https://www.postgresql.org/message-id/caexhw5s47kmubpbbrjzsm-zfe0tj2o3gbagb7yaye8rq-v2...@mail.gmail.com Author: Ashutosh Bapat Reviewed by: Michael Pacquire, Daniel Gustafsson, Tom Lane, Alvaro Herrera Discussion: https://www.postgresql.org/message-id/CAExHW5uF5V=Cjecx3_Z=7xfh4rg2wf61pt+hfquzjbqourz...@mail.gmail.com --- doc/src/sgml/regress.sgml | 12 ++ src/bin/pg_upgrade/t/002_pg_upgrade.pl | 144 ++++++++++++++++- src/test/perl/Makefile | 2 + src/test/perl/PostgreSQL/Test/AdjustDump.pm | 167 ++++++++++++++++++++ src/test/perl/meson.build | 1 + 5 files changed, 324 insertions(+), 2 deletions(-) create mode 100644 src/test/perl/PostgreSQL/Test/AdjustDump.pm diff --git a/doc/src/sgml/regress.sgml b/doc/src/sgml/regress.sgml index 0e5e8e8f309..237b974b3ab 100644 --- a/doc/src/sgml/regress.sgml +++ b/doc/src/sgml/regress.sgml @@ -357,6 +357,18 @@ make check-world PG_TEST_EXTRA='kerberos ldap ssl load_balance libpq_encryption' </para> </listitem> </varlistentry> + + <varlistentry> + <term><literal>regress_dump_test</literal></term> + <listitem> + <para> + When enabled, <filename>src/bin/pg_upgrade/t/002_pg_upgrade.pl</filename> + tests dump and restore of regression database left behind by the + regression run. Not enabled by default because it is time and resource + consuming. + </para> + </listitem> + </varlistentry> </variablelist> Tests for features that are not supported by the current build diff --git a/src/bin/pg_upgrade/t/002_pg_upgrade.pl b/src/bin/pg_upgrade/t/002_pg_upgrade.pl index 00051b85035..d08eea6693f 100644 --- a/src/bin/pg_upgrade/t/002_pg_upgrade.pl +++ b/src/bin/pg_upgrade/t/002_pg_upgrade.pl @@ -12,6 +12,7 @@ use File::Path qw(rmtree); use PostgreSQL::Test::Cluster; use PostgreSQL::Test::Utils; use PostgreSQL::Test::AdjustUpgrade; +use PostgreSQL::Test::AdjustDump; use Test::More; # Can be changed to test the other modes. @@ -35,8 +36,8 @@ sub generate_db "created database with ASCII characters from $from_char to $to_char"); } -# Filter the contents of a dump before its use in a content comparison. -# This returns the path to the filtered dump. +# Filter the contents of a dump before its use in a content comparison for +# upgrade testing. This returns the path to the filtered dump. sub filter_dump { my ($is_old, $old_version, $dump_file) = @_; @@ -262,6 +263,21 @@ else } } is($rc, 0, 'regression tests pass'); + + # Test dump/restore of the objects left behind by regression. Ideally it + # should be done in a separate TAP test, but doing it here saves us one full + # regression run. + # + # This step takes several extra seconds and some extra disk space, so + # requires an opt-in with the PG_TEST_EXTRA environment variable. + # + # Do this while the old cluster is running before it is shut down by the + # upgrade test. + if ( $ENV{PG_TEST_EXTRA} + && $ENV{PG_TEST_EXTRA} =~ /\bregress_dump_test\b/) + { + test_regression_dump_restore($oldnode, %node_params); + } } # Initialize a new node for the upgrade. @@ -539,4 +555,128 @@ my $dump2_filtered = filter_dump(0, $oldnode->pg_version, $dump2_file); compare_files($dump1_filtered, $dump2_filtered, 'old and new dumps match after pg_upgrade'); +# Test dump and restore of objects left behind by the regression run. +# +# It is expected that regression tests, which create `regression` database, are +# run on `src_node`, which in turn, is left in running state. A fresh node is +# created using given `node_params`, which are expected to be the same ones used +# to create `src_node`, so as to avoid any differences in the databases. +# +# Plain dumps from both the nodes are compared to make sure that all the dumped +# objects are restored faithfully. +sub test_regression_dump_restore +{ + my ($src_node, %node_params) = @_; + my $dst_node = PostgreSQL::Test::Cluster->new('dst_node'); + + # Make sure that the source and destination nodes have the same version and + # do not use custom install paths. In both the cases, the dump files may + # require additional adjustments unknown to code here. Do not run this test + # in such a case to avoid utilizing the time and resources unnecessarily. + if ($src_node->pg_version != $dst_node->pg_version + or defined $src_node->{_install_path}) + { + fail("same version dump and restore test using default installation"); + return; + } + + # Dump the original database for comparison later. + my $src_dump = + get_dump_for_comparison($src_node, 'regression', 'src_dump', 1); + + # Setup destination database cluster + $dst_node->init(%node_params); + # Stabilize stats for comparison. + $dst_node->append_conf('postgresql.conf', 'autovacuum = off'); + $dst_node->start; + + # Test all formats one by one. + for my $format ('plain', 'tar', 'directory', 'custom') + { + my $dump_file = "$tempdir/regression_dump.$format"; + my $restored_db = 'regression_' . $format; + + # Use --create in dump and restore commands so that the restored + # database has the same configurable variable settings as the original + # database and the plain dumps taken for comparsion do not differ + # because of locale changes. Additionally this provides test coverage + # for --create option. + $src_node->command_ok( + [ + 'pg_dump', "-F$format", '--no-sync', + '-d', $src_node->connstr('regression'), + '--create', '-f', $dump_file + ], + "pg_dump on source instance in $format format"); + + my @restore_command; + if ($format eq 'plain') + { + # Restore dump in "plain" format using `psql`. + @restore_command = [ 'psql', '-d', 'postgres', '-f', $dump_file ]; + } + else + { + @restore_command = [ + 'pg_restore', '--create', + '-d', 'postgres', $dump_file + ]; + } + $dst_node->command_ok(@restore_command, + "restored dump taken in $format format on destination instance"); + + my $dst_dump = + get_dump_for_comparison($dst_node, 'regression', + 'dest_dump.' . $format, 0); + + compare_files($src_dump, $dst_dump, + "dump outputs from original and restored regression database (using $format format) match" + ); + + # Rename the restored database so that it is available for debugging in + # case the test fails. + $dst_node->safe_psql('postgres', "ALTER DATABASE regression RENAME TO $restored_db"); + } +} + +# Dump database `db` from the given `node` in plain format and adjust it for +# comparing dumps from the original and the restored database. +# +# `file_prefix` is used to create unique names for all dump files so that they +# remain available for debugging in case the test fails. +# +# `adjust_child_columns` is passed to adjust_regress_dumpfile() which actually +# adjusts the dump output. +# +# The name of the file containting adjusted dump is returned. +sub get_dump_for_comparison +{ + my ($node, $db, $file_prefix, $adjust_child_columns) = @_; + + my $dumpfile = $tempdir . '/' . $file_prefix . '.sql'; + my $dump_adjusted = "${dumpfile}_adjusted"; + + # Usually we avoid comparing statistics in our tests since it is flaky by + # nature. However, if statistics is dumped and restored it is expected to be + # restored as it is i.e. the statistics from the original database and that + # from the restored database should match. We turn off autovacuum on the + # source and the target database to avoid any statistics update during + # restore operation. Hence we do not exclude statistics from dump. + $node->command_ok( + [ + 'pg_dump', '--no-sync', '-d', $node->connstr($db), '-f', + $dumpfile + ], + 'dump for comparison succeeded'); + + open(my $dh, '>', $dump_adjusted) + || die + "could not open $dump_adjusted for writing the adjusted dump: $!"; + print $dh adjust_regress_dumpfile(slurp_file($dumpfile), + $adjust_child_columns); + close($dh); + + return $dump_adjusted; +} + done_testing(); diff --git a/src/test/perl/Makefile b/src/test/perl/Makefile index d82fb67540e..def89650ead 100644 --- a/src/test/perl/Makefile +++ b/src/test/perl/Makefile @@ -26,6 +26,7 @@ install: all installdirs $(INSTALL_DATA) $(srcdir)/PostgreSQL/Test/Cluster.pm '$(DESTDIR)$(pgxsdir)/$(subdir)/PostgreSQL/Test/Cluster.pm' $(INSTALL_DATA) $(srcdir)/PostgreSQL/Test/BackgroundPsql.pm '$(DESTDIR)$(pgxsdir)/$(subdir)/PostgreSQL/Test/BackgroundPsql.pm' $(INSTALL_DATA) $(srcdir)/PostgreSQL/Test/AdjustUpgrade.pm '$(DESTDIR)$(pgxsdir)/$(subdir)/PostgreSQL/Test/AdjustUpgrade.pm' + $(INSTALL_DATA) $(srcdir)/PostgreSQL/Test/AdjustDump.pm '$(DESTDIR)$(pgxsdir)/$(subdir)/PostgreSQL/Test/AdjustDump.pm' $(INSTALL_DATA) $(srcdir)/PostgreSQL/Version.pm '$(DESTDIR)$(pgxsdir)/$(subdir)/PostgreSQL/Version.pm' uninstall: @@ -36,6 +37,7 @@ uninstall: rm -f '$(DESTDIR)$(pgxsdir)/$(subdir)/PostgreSQL/Test/Cluster.pm' rm -f '$(DESTDIR)$(pgxsdir)/$(subdir)/PostgreSQL/Test/BackgroundPsql.pm' rm -f '$(DESTDIR)$(pgxsdir)/$(subdir)/PostgreSQL/Test/AdjustUpgrade.pm' + rm -f '$(DESTDIR)$(pgxsdir)/$(subdir)/PostgreSQL/Test/AdjustDump.pm' rm -f '$(DESTDIR)$(pgxsdir)/$(subdir)/PostgreSQL/Version.pm' endif diff --git a/src/test/perl/PostgreSQL/Test/AdjustDump.pm b/src/test/perl/PostgreSQL/Test/AdjustDump.pm new file mode 100644 index 00000000000..74b9a60cf34 --- /dev/null +++ b/src/test/perl/PostgreSQL/Test/AdjustDump.pm @@ -0,0 +1,167 @@ + +# Copyright (c) 2024-2025, PostgreSQL Global Development Group + +=pod + +=head1 NAME + +PostgreSQL::Test::AdjustDump - helper module for dump and restore tests + +=head1 SYNOPSIS + + use PostgreSQL::Test::AdjustDump; + + # Adjust contents of dump output file so that dump output from original + # regression database and that from the restored regression database match + $dump = adjust_regress_dumpfile($dump, $adjust_child_columns); + +=head1 DESCRIPTION + +C<PostgreSQL::Test::AdjustDump> encapsulates various hacks needed to +compare the results of dump and restore tests + +=cut + +package PostgreSQL::Test::AdjustDump; + +use strict; +use warnings FATAL => 'all'; + +use Exporter 'import'; +use Test::More; + +our @EXPORT = qw( + adjust_regress_dumpfile +); + +=pod + +=head1 ROUTINES + +=over + +=item $dump = adjust_regress_dumpfile($dump, $adjust_child_columns) + +If we take dump of the regression database left behind after running regression +tests, restore the dump, and take dump of the restored regression database, the +outputs of both the dumps differ in the following cases. This routine adjusts +the given dump so that dump outputs from the original and restored database, +respectively, match. + +Case 1: Some regression tests purposefully create child tables in such a way +that the order of their inherited columns differ from column orders of their +respective parents. In the restored database, however, the order of their +inherited columns are same as that of their respective parents. Thus the column +orders of these child tables in the original database and those in the restored +database differ, causing difference in the dump outputs. See MergeAttributes() +and dumpTableSchema() for details. This routine rearranges the column +declarations in the relevant C<CREATE TABLE... INHERITS> statements in the dump +file from original database to match those from the restored database. We could, +instead, adjust the statements in the dump from the restored database to match +those from original database or adjust both to a canonical order. But we have +chosen to adjust the statements in the dump from original database for no +particular reason. + +Case 2: When dumping COPY statements the columns are ordered by their attribute +number by fmtCopyColumnList(). If a column is added to a parent table after a +child has inherited the parent and the child has its own columns, the attribute +number of the column changes after restoring the child table. This is because +when executing the dumped C<CREATE TABLE... INHERITS> statement all the parent +attributes are created before any child attributes. Thus the order of columns in +COPY statements dumped from the original and the restored databases, +respectively, differs. Such tables in regression tests are listed below. It is +hard to adjust the column order in the COPY statement along with the data. Hence +we just remove such COPY statements from the dump output. + +Additionally the routine adjusts blank and new lines to avoid noise. + +Note: Usually we avoid comparing statistics in our tests since it is flaky by +nature. However, if statistics is dumped and restored it is expected to be +restored as it is i.e. the statistics from the original database and that from +the restored database should match. Hence we do not filter statistics from dump, +if it's dumped. + +Arguments: + +=over + +=item C<dump>: Contents of dump file + +=item C<adjust_child_columns>: 1 indicates that the given dump file requires +adjusting columns in the child tables; usually when the dump is from original +database. 0 indicates no such adjustment is needed; usually when the dump is +from restored database. + +=back + +Returns the adjusted dump text. + +=cut + +sub adjust_regress_dumpfile +{ + my ($dump, $adjust_child_columns) = @_; + + # use Unix newlines + $dump =~ s/\r\n/\n/g; + + # Adjust the CREATE TABLE ... INHERITS statements. + if ($adjust_child_columns) + { + my $saved_dump = $dump; + + $dump =~ s/(^CREATE\sTABLE\sgenerated_stored_tests\.gtestxx_4\s\() + (\n\s+b\sinteger), + (\n\s+a\sinteger\sNOT\sNULL)/$1$3,$2/mgx; + ok($saved_dump ne $dump, + 'applied generated_stored_tests.gtestxx_4 adjustments'); + + $saved_dump = $dump; + $dump =~ s/(^CREATE\sTABLE\sgenerated_virtual_tests\.gtestxx_4\s\() + (\n\s+b\sinteger), + (\n\s+a\sinteger\sNOT\sNULL)/$1$3,$2/mgx; + ok($saved_dump ne $dump, + 'applied generated_virtual_tests.gtestxx_4 adjustments'); + + $saved_dump = $dump; + $dump =~ s/(^CREATE\sTABLE\spublic\.test_type_diff2_c1\s\() + (\n\s+int_four\sbigint), + (\n\s+int_eight\sbigint), + (\n\s+int_two\ssmallint)/$1$4,$2,$3/mgx; + ok($saved_dump ne $dump, + 'applied public.test_type_diff2_c1 adjustments'); + + $saved_dump = $dump; + $dump =~ s/(^CREATE\sTABLE\spublic\.test_type_diff2_c2\s\() + (\n\s+int_eight\sbigint), + (\n\s+int_two\ssmallint), + (\n\s+int_four\sbigint)/$1$3,$4,$2/mgx; + ok($saved_dump ne $dump, + 'applied public.test_type_diff2_c2 adjustments'); + } + + # Remove COPY statements with differing column order + for my $table ( + 'public\.b_star', 'public\.c_star', + 'public\.cc2', 'public\.d_star', + 'public\.e_star', 'public\.f_star', + 'public\.renamecolumnanother', 'public\.renamecolumnchild', + 'public\.test_type_diff2_c1', 'public\.test_type_diff2_c2', + 'public\.test_type_diff_c') + { + $dump =~ s/^COPY\s$table\s\(.+?^\\\.$//sm; + } + + # Suppress blank lines, as some places in pg_dump emit more or fewer. + $dump =~ s/\n\n+/\n/g; + + return $dump; +} + +=pod + +=back + +=cut + +1; diff --git a/src/test/perl/meson.build b/src/test/perl/meson.build index 58e30f15f9d..492ca571ff8 100644 --- a/src/test/perl/meson.build +++ b/src/test/perl/meson.build @@ -14,4 +14,5 @@ install_data( 'PostgreSQL/Test/Cluster.pm', 'PostgreSQL/Test/BackgroundPsql.pm', 'PostgreSQL/Test/AdjustUpgrade.pm', + 'PostgreSQL/Test/AdjustDump.pm', install_dir: dir_pgxs / 'src/test/perl/PostgreSQL/Test') -- 2.34.1
From c0bb2eab8cba8910d7c50df3c29c146679a1ae6c Mon Sep 17 00:00:00 2001 From: Ashutosh Bapat <ashutosh.bapat....@gmail.com> Date: Mon, 24 Mar 2025 11:21:12 +0530 Subject: [PATCH v2j 2/3] Use only one format and make the test run default According to Alvaro (and I agree with him), the test should be run by default. Otherwise we get to know about a bug only after buildfarm animal where it's enabled reports a failure. Further testing only one format may suffice; since all the formats have shown the same bugs till now. If we use --directory format we can use -j which reduces the time taken by dump/restore test by about 12%. This patch removes PG_TEST_EXTRA option as well as runs the test only in directory format with parallelism enabled. Note for committer: If we decide to accept this change, it should be merged with the previous commit. --- doc/src/sgml/regress.sgml | 12 ---- src/bin/pg_upgrade/t/002_pg_upgrade.pl | 76 +++++++++----------------- 2 files changed, 25 insertions(+), 63 deletions(-) diff --git a/doc/src/sgml/regress.sgml b/doc/src/sgml/regress.sgml index 237b974b3ab..0e5e8e8f309 100644 --- a/doc/src/sgml/regress.sgml +++ b/doc/src/sgml/regress.sgml @@ -357,18 +357,6 @@ make check-world PG_TEST_EXTRA='kerberos ldap ssl load_balance libpq_encryption' </para> </listitem> </varlistentry> - - <varlistentry> - <term><literal>regress_dump_test</literal></term> - <listitem> - <para> - When enabled, <filename>src/bin/pg_upgrade/t/002_pg_upgrade.pl</filename> - tests dump and restore of regression database left behind by the - regression run. Not enabled by default because it is time and resource - consuming. - </para> - </listitem> - </varlistentry> </variablelist> Tests for features that are not supported by the current build diff --git a/src/bin/pg_upgrade/t/002_pg_upgrade.pl b/src/bin/pg_upgrade/t/002_pg_upgrade.pl index d08eea6693f..cbd9831bf9e 100644 --- a/src/bin/pg_upgrade/t/002_pg_upgrade.pl +++ b/src/bin/pg_upgrade/t/002_pg_upgrade.pl @@ -268,16 +268,9 @@ else # should be done in a separate TAP test, but doing it here saves us one full # regression run. # - # This step takes several extra seconds and some extra disk space, so - # requires an opt-in with the PG_TEST_EXTRA environment variable. - # # Do this while the old cluster is running before it is shut down by the # upgrade test. - if ( $ENV{PG_TEST_EXTRA} - && $ENV{PG_TEST_EXTRA} =~ /\bregress_dump_test\b/) - { - test_regression_dump_restore($oldnode, %node_params); - } + test_regression_dump_restore($oldnode, %node_params); } # Initialize a new node for the upgrade. @@ -590,53 +583,34 @@ sub test_regression_dump_restore $dst_node->append_conf('postgresql.conf', 'autovacuum = off'); $dst_node->start; - # Test all formats one by one. - for my $format ('plain', 'tar', 'directory', 'custom') - { - my $dump_file = "$tempdir/regression_dump.$format"; - my $restored_db = 'regression_' . $format; - - # Use --create in dump and restore commands so that the restored - # database has the same configurable variable settings as the original - # database and the plain dumps taken for comparsion do not differ - # because of locale changes. Additionally this provides test coverage - # for --create option. - $src_node->command_ok( - [ - 'pg_dump', "-F$format", '--no-sync', - '-d', $src_node->connstr('regression'), - '--create', '-f', $dump_file - ], - "pg_dump on source instance in $format format"); + my $dump_file = "$tempdir/regression.dump"; - my @restore_command; - if ($format eq 'plain') - { - # Restore dump in "plain" format using `psql`. - @restore_command = [ 'psql', '-d', 'postgres', '-f', $dump_file ]; - } - else - { - @restore_command = [ - 'pg_restore', '--create', - '-d', 'postgres', $dump_file - ]; - } - $dst_node->command_ok(@restore_command, - "restored dump taken in $format format on destination instance"); + # Use --create in dump and restore commands so that the restored database + # has the same configurable variable settings as the original database so + # that the plain dumps taken from both the database taken for comparisong do + # not differ because of locale changes. Additionally this provides test + # coverage for --create option. + # + # We use directory format which allows dumping and restoring in parallel to + # reduce the test's run time. + $src_node->command_ok( + [ + 'pg_dump', '-Fd', '-j2', '--no-sync', + '-d', $src_node->connstr('regression'), + '--create', '-f', $dump_file + ], + "pg_dump on source instance succeeded"); - my $dst_dump = - get_dump_for_comparison($dst_node, 'regression', - 'dest_dump.' . $format, 0); + $dst_node->command_ok( + [ 'pg_restore', '--create', '-j2', '-d', 'postgres', $dump_file ], + "restored dump to destination instance"); - compare_files($src_dump, $dst_dump, - "dump outputs from original and restored regression database (using $format format) match" - ); + my $dst_dump = get_dump_for_comparison($dst_node, 'regression', + 'dest_dump', 0); - # Rename the restored database so that it is available for debugging in - # case the test fails. - $dst_node->safe_psql('postgres', "ALTER DATABASE regression RENAME TO $restored_db"); - } + compare_files($src_dump, $dst_dump, + "dump outputs from original and restored regression database match" + ); } # Dump database `db` from the given `node` in plain format and adjust it for -- 2.34.1
From 0cf33ba92d80d53b78738ea80c1d660032ec5e24 Mon Sep 17 00:00:00 2001 From: Jeff Davis <j...@j-davis.com> Date: Tue, 11 Mar 2025 13:51:25 -0700 Subject: [PATCH v2j 3/3] Make MV statistics depend on MV data. REFRESH MATERIALIZED VIEW replaces the storage, which resets statistics, so statistics must be restored afterward. If statistics are for a materialized view, and if the materialized view data is being dumped, create a dependency from the former referring to the latter. Defer the statistics to SECTION_POST_DATA, which is where the materialized view data goes. In that case, it also requires deferring the statistics to RESTORE_PASS_POST_ACL. Reported-by: Ashutosh Bapat <ashutosh.bapat....@gmail.com> Discussion: https://postgr.es/m/caexhw5s47kmubpbbrjzsm-zfe0tj2o3gbagb7yaye8rq-v2...@mail.gmail.com --- src/bin/pg_dump/pg_backup_archiver.c | 46 ++++++++---- src/bin/pg_dump/pg_dump.c | 104 +++++++++++++++------------ src/bin/pg_dump/pg_dump.h | 3 +- src/bin/pg_dump/pg_dump_sort.c | 2 +- 4 files changed, 95 insertions(+), 60 deletions(-) diff --git a/src/bin/pg_dump/pg_backup_archiver.c b/src/bin/pg_dump/pg_backup_archiver.c index 82d51c89ac6..1d131e5a57d 100644 --- a/src/bin/pg_dump/pg_backup_archiver.c +++ b/src/bin/pg_dump/pg_backup_archiver.c @@ -72,7 +72,7 @@ static void processEncodingEntry(ArchiveHandle *AH, TocEntry *te); static void processStdStringsEntry(ArchiveHandle *AH, TocEntry *te); static void processSearchPathEntry(ArchiveHandle *AH, TocEntry *te); static int _tocEntryRequired(TocEntry *te, teSection curSection, ArchiveHandle *AH); -static RestorePass _tocEntryRestorePass(TocEntry *te); +static RestorePass _tocEntryRestorePass(ArchiveHandle *AH, TocEntry *te); static bool _tocEntryIsACL(TocEntry *te); static void _disableTriggersIfNecessary(ArchiveHandle *AH, TocEntry *te); static void _enableTriggersIfNecessary(ArchiveHandle *AH, TocEntry *te); @@ -102,7 +102,8 @@ static void pending_list_append(TocEntry *l, TocEntry *te); static void pending_list_remove(TocEntry *te); static int TocEntrySizeCompareQsort(const void *p1, const void *p2); static int TocEntrySizeCompareBinaryheap(void *p1, void *p2, void *arg); -static void move_to_ready_heap(TocEntry *pending_list, +static void move_to_ready_heap(ArchiveHandle *AH, + TocEntry *pending_list, binaryheap *ready_heap, RestorePass pass); static TocEntry *pop_next_work_item(binaryheap *ready_heap, @@ -748,7 +749,7 @@ RestoreArchive(Archive *AHX) if ((te->reqs & (REQ_SCHEMA | REQ_DATA | REQ_STATS)) == 0) continue; /* ignore if not to be dumped at all */ - switch (_tocEntryRestorePass(te)) + switch (_tocEntryRestorePass(AH, te)) { case RESTORE_PASS_MAIN: (void) restore_toc_entry(AH, te, false); @@ -767,7 +768,7 @@ RestoreArchive(Archive *AHX) for (te = AH->toc->next; te != AH->toc; te = te->next) { if ((te->reqs & (REQ_SCHEMA | REQ_DATA | REQ_STATS)) != 0 && - _tocEntryRestorePass(te) == RESTORE_PASS_ACL) + _tocEntryRestorePass(AH, te) == RESTORE_PASS_ACL) (void) restore_toc_entry(AH, te, false); } } @@ -777,7 +778,7 @@ RestoreArchive(Archive *AHX) for (te = AH->toc->next; te != AH->toc; te = te->next) { if ((te->reqs & (REQ_SCHEMA | REQ_DATA | REQ_STATS)) != 0 && - _tocEntryRestorePass(te) == RESTORE_PASS_POST_ACL) + _tocEntryRestorePass(AH, te) == RESTORE_PASS_POST_ACL) (void) restore_toc_entry(AH, te, false); } } @@ -3219,7 +3220,7 @@ _tocEntryRequired(TocEntry *te, teSection curSection, ArchiveHandle *AH) * See notes with the RestorePass typedef in pg_backup_archiver.h. */ static RestorePass -_tocEntryRestorePass(TocEntry *te) +_tocEntryRestorePass(ArchiveHandle *AH, TocEntry *te) { /* "ACL LANGUAGE" was a crock emitted only in PG 7.4 */ if (strcmp(te->desc, "ACL") == 0 || @@ -3240,6 +3241,26 @@ _tocEntryRestorePass(TocEntry *te) strncmp(te->tag, "EVENT TRIGGER ", 14) == 0) return RESTORE_PASS_POST_ACL; + /* + * If statistics data is dependent on materialized view data, it must be + * deferred to RESTORE_PASS_POST_ACL. + */ + if (strcmp(te->desc, "STATISTICS DATA") == 0) + { + for (int i = 0; i < te->nDeps; i++) + { + DumpId depid = te->dependencies[i]; + + if (depid <= AH->maxDumpId && AH->tocsByDumpId[depid] != NULL) + { + TocEntry *otherte = AH->tocsByDumpId[depid]; + + if (strcmp(otherte->desc, "MATERIALIZED VIEW DATA") == 0) + return RESTORE_PASS_POST_ACL; + } + } + } + /* All else can be handled in the main pass. */ return RESTORE_PASS_MAIN; } @@ -4249,7 +4270,7 @@ restore_toc_entries_prefork(ArchiveHandle *AH, TocEntry *pending_list) * not set skipped_some in this case, since by assumption no main-pass * items could depend on these. */ - if (_tocEntryRestorePass(next_work_item) != RESTORE_PASS_MAIN) + if (_tocEntryRestorePass(AH, next_work_item) != RESTORE_PASS_MAIN) do_now = false; if (do_now) @@ -4331,7 +4352,7 @@ restore_toc_entries_parallel(ArchiveHandle *AH, ParallelState *pstate, * process in the current restore pass. */ AH->restorePass = RESTORE_PASS_MAIN; - move_to_ready_heap(pending_list, ready_heap, AH->restorePass); + move_to_ready_heap(AH, pending_list, ready_heap, AH->restorePass); /* * main parent loop @@ -4380,7 +4401,7 @@ restore_toc_entries_parallel(ArchiveHandle *AH, ParallelState *pstate, /* Advance to next restore pass */ AH->restorePass++; /* That probably allows some stuff to be made ready */ - move_to_ready_heap(pending_list, ready_heap, AH->restorePass); + move_to_ready_heap(AH, pending_list, ready_heap, AH->restorePass); /* Loop around to see if anything's now ready */ continue; } @@ -4551,7 +4572,8 @@ TocEntrySizeCompareBinaryheap(void *p1, void *p2, void *arg) * which applies the same logic one-at-a-time.) */ static void -move_to_ready_heap(TocEntry *pending_list, +move_to_ready_heap(ArchiveHandle *AH, + TocEntry *pending_list, binaryheap *ready_heap, RestorePass pass) { @@ -4564,7 +4586,7 @@ move_to_ready_heap(TocEntry *pending_list, next_te = te->pending_next; if (te->depCount == 0 && - _tocEntryRestorePass(te) == pass) + _tocEntryRestorePass(AH, te) == pass) { /* Remove it from pending_list ... */ pending_list_remove(te); @@ -4958,7 +4980,7 @@ reduce_dependencies(ArchiveHandle *AH, TocEntry *te, * memberships changed. */ if (otherte->depCount == 0 && - _tocEntryRestorePass(otherte) == AH->restorePass && + _tocEntryRestorePass(AH, otherte) == AH->restorePass && otherte->pending_prev != NULL && ready_heap != NULL) { diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index e41e645f649..78bf376c3c3 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -3002,6 +3002,21 @@ makeTableDataInfo(DumpOptions *dopt, TableInfo *tbinfo) tbinfo->dataObj = tdinfo; + /* + * If statistics and data are being dumped for a materialized view, the + * statistics depend on the materialized view data. That's because REFRESH + * MATERIALIZED VIEW will completely replace the storage and reset the + * stats. + * + * The dependency is added here because the statistics objects are created + * first. + */ + if (tbinfo->relkind == RELKIND_MATVIEW && tbinfo->stats != NULL) + { + tbinfo->stats->section = SECTION_POST_DATA; + addObjectDependency(&tbinfo->stats->dobj, tdinfo->dobj.dumpId); + } + /* Make sure that we'll collect per-column info for this table. */ tbinfo->interesting = true; } @@ -6893,7 +6908,34 @@ getRelationStatistics(Archive *fout, DumpableObject *rel, int32 relpages, info->relkind = relkind; info->indAttNames = indAttNames; info->nindAttNames = nindAttNames; - info->postponed_def = false; + + /* + * Ordinarily, stats go in SECTION_DATA for tables and + * SECTION_POST_DATA for indexes. + * + * However, the section may be updated later for materialized views. + * Matview stats depend on the matview data, because REFRESH + * MATERIALIZED VIEW replaces the storage and resets the stats, and + * the matview data is in SECTION_POST_DATA. Also, the materialized + * view definition may be postponed from SECTION_PRE_DATA to + * SECTION_POST_DATA to resolve some kinds of dependency problems (see + * repairMatViewBoundaryMultiLoop()). + */ + switch (info->relkind) + { + case RELKIND_RELATION: + case RELKIND_PARTITIONED_TABLE: + case RELKIND_MATVIEW: + info->section = SECTION_DATA; + break; + case RELKIND_INDEX: + case RELKIND_PARTITIONED_INDEX: + info->section = SECTION_POST_DATA; + break; + default: + pg_fatal("cannot dump statistics for relation kind '%c'", + info->relkind); + } return info; } @@ -7292,9 +7334,17 @@ getTables(Archive *fout, int *numTables) /* Add statistics */ if (tblinfo[i].interesting) - getRelationStatistics(fout, &tblinfo[i].dobj, tblinfo[i].relpages, - PQgetvalue(res, i, i_reltuples), - relallvisible, tblinfo[i].relkind, NULL, 0); + { + RelStatsInfo *stats; + + stats = getRelationStatistics(fout, &tblinfo[i].dobj, + tblinfo[i].relpages, + PQgetvalue(res, i, i_reltuples), + relallvisible, + tblinfo[i].relkind, NULL, 0); + if (tblinfo[i].relkind == RELKIND_MATVIEW) + tblinfo[i].stats = stats; + } /* * Read-lock target tables to make sure they aren't DROPPED or altered @@ -10491,34 +10541,6 @@ appendNamedArgument(PQExpBuffer out, Archive *fout, const char *argname, appendPQExpBuffer(out, "::%s", argtype); } -/* - * Decide which section to use based on the relkind of the parent object. - * - * NB: materialized views may be postponed from SECTION_PRE_DATA to - * SECTION_POST_DATA to resolve some kinds of dependency problems. If so, the - * matview stats will also be postponed to SECTION_POST_DATA. See - * repairMatViewBoundaryMultiLoop(). - */ -static teSection -statisticsDumpSection(const RelStatsInfo *rsinfo) -{ - switch (rsinfo->relkind) - { - case RELKIND_RELATION: - case RELKIND_PARTITIONED_TABLE: - case RELKIND_MATVIEW: - return SECTION_DATA; - case RELKIND_INDEX: - case RELKIND_PARTITIONED_INDEX: - return SECTION_POST_DATA; - default: - pg_fatal("cannot dump statistics for relation kind '%c'", - rsinfo->relkind); - } - - return 0; /* keep compiler quiet */ -} - /* * dumpRelationStats -- * @@ -10531,8 +10553,6 @@ dumpRelationStats(Archive *fout, const RelStatsInfo *rsinfo) PGresult *res; PQExpBuffer query; PQExpBuffer out; - DumpId *deps = NULL; - int ndeps = 0; int i_attname; int i_inherited; int i_null_frac; @@ -10553,13 +10573,6 @@ dumpRelationStats(Archive *fout, const RelStatsInfo *rsinfo) if (!fout->dopt->dumpStatistics) return; - /* dependent on the relation definition, if doing schema */ - if (fout->dopt->dumpSchema) - { - deps = dobj->dependencies; - ndeps = dobj->nDeps; - } - query = createPQExpBuffer(); if (!fout->is_prepared[PREPQUERY_GETATTRIBUTESTATS]) { @@ -10737,11 +10750,10 @@ dumpRelationStats(Archive *fout, const RelStatsInfo *rsinfo) ARCHIVE_OPTS(.tag = dobj->name, .namespace = dobj->namespace->dobj.name, .description = "STATISTICS DATA", - .section = rsinfo->postponed_def ? - SECTION_POST_DATA : statisticsDumpSection(rsinfo), + .section = rsinfo->section, .createStmt = out->data, - .deps = deps, - .nDeps = ndeps)); + .deps = dobj->dependencies, + .nDeps = dobj->nDeps)); destroyPQExpBuffer(out); destroyPQExpBuffer(query); @@ -19429,7 +19441,7 @@ addBoundaryDependencies(DumpableObject **dobjs, int numObjs, break; case DO_REL_STATS: /* stats section varies by parent object type, DATA or POST */ - if (statisticsDumpSection((RelStatsInfo *) dobj) == SECTION_DATA) + if (((RelStatsInfo *) dobj)->section == SECTION_DATA) { addObjectDependency(dobj, preDataBound->dumpId); addObjectDependency(postDataBound, dobj->dumpId); diff --git a/src/bin/pg_dump/pg_dump.h b/src/bin/pg_dump/pg_dump.h index bbdb30b5f54..70f7a369e4a 100644 --- a/src/bin/pg_dump/pg_dump.h +++ b/src/bin/pg_dump/pg_dump.h @@ -369,6 +369,7 @@ typedef struct _tableInfo bool *notnull_islocal; /* true if NOT NULL has local definition */ struct _attrDefInfo **attrdefs; /* DEFAULT expressions */ struct _constraintInfo *checkexprs; /* CHECK constraints */ + struct _relStatsInfo *stats; /* only set for matviews */ bool needs_override; /* has GENERATED ALWAYS AS IDENTITY */ char *amname; /* relation access method */ @@ -449,7 +450,7 @@ typedef struct _relStatsInfo */ char **indAttNames; /* attnames of the index, in order */ int32 nindAttNames; /* number of attnames stored (can be 0) */ - bool postponed_def; /* stats must be postponed into post-data */ + teSection section; /* stats may appear in data or post-data */ } RelStatsInfo; typedef struct _statsExtInfo diff --git a/src/bin/pg_dump/pg_dump_sort.c b/src/bin/pg_dump/pg_dump_sort.c index f75e9928bad..0b0977788f1 100644 --- a/src/bin/pg_dump/pg_dump_sort.c +++ b/src/bin/pg_dump/pg_dump_sort.c @@ -820,7 +820,7 @@ repairMatViewBoundaryMultiLoop(DumpableObject *boundaryobj, RelStatsInfo *nextinfo = (RelStatsInfo *) nextobj; if (nextinfo->relkind == RELKIND_MATVIEW) - nextinfo->postponed_def = true; + nextinfo->section = SECTION_POST_DATA; } } -- 2.34.1