On 2023-01-15 Su 18:37, Tom Lane wrote:
> Andrew Dunstan <and...@dunslane.net> writes:
>> Those replacement lines are very difficult to read. I think use of
>> extended regexes and some multi-part replacements would help. I'll have
>> a go at that tomorrow.
> Yeah, after I wrote that code I remembered about \Q ... \E, which would
> eliminate the need for most of the backslashes and probably make things
> better that way.  I didn't get around to improving it yet though, so
> feel free to have a go.
>
>                       


OK, here's my version. It tests clean against all of crake's dump files
back to 9.2.

To some extent it's a matter of taste, but I hate very long regex lines
- it makes it very hard to see what's actually changing, so I broke up
most of those.

Given that we are looking at newlines in some places I decided that
after all it was important to convert CRLF to LF.


cheers


andrew

--
Andrew Dunstan
EDB: https://www.enterprisedb.com
diff --git a/src/test/perl/PostgreSQL/Test/AdjustUpgrade.pm b/src/test/perl/PostgreSQL/Test/AdjustUpgrade.pm
new file mode 100644
index 0000000000..2134afd64e
--- /dev/null
+++ b/src/test/perl/PostgreSQL/Test/AdjustUpgrade.pm
@@ -0,0 +1,550 @@
+
+# Copyright (c) 2023, PostgreSQL Global Development Group
+
+=pod
+
+=head1 NAME
+
+PostgreSQL::Test::AdjustUpgrade - helper module for cross-version upgrade tests
+
+=head1 SYNOPSIS
+
+  use PostgreSQL::Test::AdjustUpgrade;
+
+  # Build commands to adjust contents of old-version database before dumping
+  $statements = adjust_database_contents($old_version, %dbnames);
+
+  # Adjust contents of old pg_dumpall output file to match newer version
+  $dump = adjust_old_dumpfile($old_version, $dump);
+
+  # Adjust contents of new pg_dumpall output file to match older version
+  $dump = adjust_new_dumpfile($old_version, $dump);
+
+=head1 DESCRIPTION
+
+C<PostgreSQL::Test::AdjustUpgrade> encapsulates various hacks needed to
+compare the results of cross-version upgrade tests.
+
+=cut
+
+package PostgreSQL::Test::AdjustUpgrade;
+
+use strict;
+use warnings;
+
+use Exporter 'import';
+use PostgreSQL::Version;
+
+our @EXPORT = qw(
+  adjust_database_contents
+  adjust_old_dumpfile
+  adjust_new_dumpfile
+);
+
+=pod
+
+=head1 ROUTINES
+
+=over
+
+=item $statements = adjust_database_contents($old_version, %dbnames)
+
+Generate SQL commands to perform any changes to an old-version installation
+that are needed before we can pg_upgrade it into the current PostgreSQL
+version.
+
+Typically this involves dropping or adjusting no-longer-supported objects.
+
+Arguments:
+
+=over
+
+=item C<old_version>: Branch we are upgrading from.  This can be a branch
+name such as 'HEAD' or 'REL_11_STABLE', but it can also be any string
+that PostgreSQL::Version accepts.
+
+=item C<dbnames>: Hash of database names present in the old installation.
+
+=back
+
+Returns a reference to a hash, wherein the keys are database names and the
+values are arrayrefs to lists of statements to be run in those databases.
+
+=cut
+
+sub adjust_database_contents
+{
+	my ($old_version, %dbnames) = @_;
+	my $result = {};
+
+	# nothing to do for non-cross-version tests
+	return $result if $old_version eq 'HEAD';
+
+	# convert branch name to numeric form
+	$old_version =~ s/REL_?(\d+(?:_\d+)?)_STABLE/$1/;
+	$old_version =~ s/_/./;
+	$old_version = PostgreSQL::Version->new($old_version);
+
+	# remove dbs of modules known to cause pg_upgrade to fail
+	# anything not builtin and incompatible should clean up its own db
+	foreach my $bad_module ('test_ddl_deparse', 'tsearch2')
+	{
+		if ($dbnames{"contrib_regression_$bad_module"})
+		{
+			_add_st($result, 'postgres',
+				"drop database contrib_regression_$bad_module");
+			delete($dbnames{"contrib_regression_$bad_module"});
+		}
+	}
+
+	# avoid version number issues with test_ext7
+	if ($dbnames{contrib_regression_test_extensions})
+	{
+		_add_st(
+			$result,
+			'contrib_regression_test_extensions',
+			'drop extension if exists test_ext7');
+	}
+
+	# stuff not supported from release 16
+	if ($old_version >= 12 && $old_version < 16)
+	{
+		# Can't upgrade aclitem in user tables from pre 16 to 16+.
+		_add_st($result, 'regression',
+			'alter table public.tab_core_types drop column aclitem');
+		# Can't handle child tables with locally-generated columns.
+		_add_st(
+			$result, 'regression',
+			'drop table public.gtest_normal_child',
+			'drop table public.gtest_normal_child2');
+	}
+
+	# stuff not supported from release 14
+	if ($old_version < 14)
+	{
+		# postfix operators (some don't exist in very old versions)
+		_add_st(
+			$result,
+			'regression',
+			'drop operator #@# (bigint,NONE)',
+			'drop operator #%# (bigint,NONE)',
+			'drop operator if exists !=- (bigint,NONE)',
+			'drop operator if exists #@%# (bigint,NONE)');
+
+		# get rid of dblink's dependencies on regress.so
+		my $regrdb =
+		  $old_version le '9.4'
+		  ? 'contrib_regression'
+		  : 'contrib_regression_dblink';
+
+		if ($dbnames{$regrdb})
+		{
+			_add_st(
+				$result, $regrdb,
+				'drop function if exists public.putenv(text)',
+				'drop function if exists public.wait_pid(integer)');
+		}
+	}
+
+	# user table OIDs are gone from release 12 on
+	if ($old_version < 12)
+	{
+		my $nooid_stmt = q{
+           DO $stmt$
+           DECLARE
+              rec text;
+           BEGIN
+              FOR rec in
+                 select oid::regclass::text
+                 from pg_class
+                 where relname !~ '^pg_'
+                    and relhasoids
+                    and relkind in ('r','m')
+                 order by 1
+              LOOP
+                 execute 'ALTER TABLE ' || rec || ' SET WITHOUT OIDS';
+                 RAISE NOTICE 'removing oids from table %', rec;
+              END LOOP;
+           END; $stmt$;
+        };
+
+		foreach my $oiddb ('regression', 'contrib_regression_btree_gist')
+		{
+			next unless $dbnames{$oiddb};
+			_add_st($result, $oiddb, $nooid_stmt);
+		}
+
+		# this table had OIDs too, but we'll just drop it
+		if ($old_version >= 10 && $dbnames{'contrib_regression_postgres_fdw'})
+		{
+			_add_st(
+				$result,
+				'contrib_regression_postgres_fdw',
+				'drop foreign table ft_pg_type');
+		}
+	}
+
+	# abstime+friends are gone from release 12 on; but these tables
+	# might or might not be present depending on regression test vintage
+	if ($old_version < 12)
+	{
+		_add_st($result, 'regression',
+			'drop table if exists abstime_tbl, reltime_tbl, tinterval_tbl');
+	}
+
+	# some regression functions gone from release 11 on
+	if ($old_version < 11)
+	{
+		_add_st(
+			$result, 'regression',
+			'drop function if exists public.boxarea(box)',
+			'drop function if exists public.funny_dup17()');
+	}
+
+	# version-0 C functions are no longer supported
+	if ($old_version < 10)
+	{
+		_add_st($result, 'regression',
+			'drop function oldstyle_length(integer, text)');
+	}
+
+	if ($old_version lt '9.5')
+	{
+		# cope with changes of underlying functions
+		_add_st(
+			$result,
+			'regression',
+			'drop operator @#@ (NONE, bigint)',
+			'CREATE OPERATOR @#@ ('
+			  . 'PROCEDURE = factorial, RIGHTARG = bigint )',
+			'drop aggregate public.array_cat_accum(anyarray)',
+			'CREATE AGGREGATE array_larger_accum (anyarray) ' . ' ( '
+			  . '   sfunc = array_larger, '
+			  . '   stype = anyarray, '
+			  . '   initcond = $${}$$ ' . '  ) ');
+
+		# "=>" is no longer valid as an operator name
+		_add_st($result, 'regression',
+			'drop operator if exists public.=> (bigint, NONE)');
+	}
+
+	return $result;
+}
+
+# Internal subroutine to add statement(s) to the list for the given db.
+sub _add_st
+{
+	my ($result, $db, @st) = @_;
+
+	$result->{$db} ||= [];
+	push(@{ $result->{$db} }, @st);
+}
+
+=pod
+
+=item adjust_old_dumpfile($old_version, $dump)
+
+Edit a dump output file, taken from the adjusted old-version installation
+by current-version C<pg_dumpall -s>, so that it will match the results of
+C<pg_dumpall -s> on the pg_upgrade'd installation.
+
+Typically this involves coping with cosmetic differences in the output
+of backend subroutines used by pg_dump.
+
+Arguments:
+
+=over
+
+=item C<old_version>: Branch we are upgrading from.  This can be a branch
+name such as 'HEAD' or 'REL_11_STABLE', but it can also be any string
+that PostgreSQL::Version accepts.
+
+=item C<dump>: Contents of dump file
+
+=back
+
+Returns the modified dump text.
+
+=cut
+
+sub adjust_old_dumpfile
+{
+	my ($old_version, $dump) = @_;
+
+	# nothing to do for non-cross-version tests
+	return $dump if $old_version eq 'HEAD';
+
+	# convert branch name to numeric form
+	$old_version =~ s/REL_?(\d+(?:_\d+)?)_STABLE/$1/;
+	$old_version =~ s/_/./;
+	$old_version = PostgreSQL::Version->new($old_version);
+
+	# use Unix newlines
+
+	$dump =~ s/\r\n/\n/g;
+
+	# Version comments will certainly not match.
+	$dump =~ s/^-- Dumped from database version.*\n//mg;
+
+	if ($old_version >= 14 && $old_version < 16)
+	{
+		# Fix up some privilege-set discrepancies.
+		$dump =~
+		  s {^REVOKE SELECT,INSERT,REFERENCES,DELETE,TRIGGER,TRUNCATE,UPDATE ON TABLE}
+			{REVOKE ALL ON TABLE}mg;
+		$dump =~
+		  s {^(GRANT SELECT,INSERT,REFERENCES,TRIGGER,TRUNCATE),UPDATE ON TABLE}
+			{$1,MAINTAIN,UPDATE ON TABLE}mg;
+	}
+
+	if ($old_version < 14)
+	{
+		# Remove mentions of extended hash functions.
+		$dump =~ s {(^\s+OPERATOR\s1\s=\((?:integer,integer|text,text)\))\s,\n
+                    \s+FUNCTION\s2\s.*?public.part_hash.*?;}
+				   {$1;}mxg;
+	}
+
+	# Change trigger definitions to say ... EXECUTE FUNCTION ...
+	if ($old_version < 12)
+	{
+		# would like to use lookbehind here but perl complains
+		# so do it this way
+		$dump =~ s/
+			(^CREATE\sTRIGGER\s.*?)
+			\sEXECUTE\sPROCEDURE
+			/$1 EXECUTE FUNCTION/mgx;
+	}
+
+	if ($old_version lt '9.6')
+	{
+		# adjust some places where we don't print so many parens anymore
+
+		my $prefix =
+			"'New York'\tnew & york | big & apple | nyc\t'new' & 'york'\t";
+		my $orig = "( 'new' & 'york' | 'big' & 'appl' ) | 'nyc'";
+		my $repl = "'new' & 'york' | 'big' & 'appl' | 'nyc'";
+		$dump =~ s/(?<=^\Q$prefix\E)\Q$orig\E/$repl/mg;
+
+		$prefix = "'Sanct Peter'\tPeterburg | peter | 'Sanct Peterburg'\t'sanct' & 'peter'\t";
+		$orig = "( 'peterburg' | 'peter' ) | 'sanct' & 'peterburg'";
+		$repl = "'peterburg' | 'peter' | 'sanct' & 'peterburg'";
+		$dump =~ s/(?<=^\Q$prefix\E)\Q$orig\E/$repl/mg;
+	}
+
+	if ($old_version lt '9.5')
+	{
+		# adjust some places where we don't print so many parens anymore
+
+		my $prefix = "CONSTRAINT (?:sequence|copy)_con CHECK [(][(]";
+		my $orig = "((x > 3) AND (y <> 'check failed'::text))";
+		my $repl = "(x > 3) AND (y <> 'check failed'::text)";
+		$dump =~ s/($prefix)\Q$orig\E/$1$repl/mg;
+
+		$prefix = "CONSTRAINT insert_con CHECK";
+		$orig = "((x >= 3) AND (y <> 'check failed'::text))";
+		$repl = "(x >= 3) AND (y <> 'check failed'::text)";
+		$dump =~ s/($prefix.*?)\Q$orig\E/$1$repl/mg;
+
+		$orig = "DEFAULT ((-1) * currval('public.insert_seq'::regclass))";
+		$repl = "DEFAULT ('-1'::integer * currval('public.insert_seq'::regclass))";
+		$dump =~ s/\Q$orig\E/$repl/mg;
+
+		my $expr =
+		  "(rsl.sl_color = rsh.slcolor) AND (rsl.sl_len_cm >= rsh.slminlen_cm)";
+		$dump =~ s/WHERE \(\(\Q$expr\E\)/WHERE ($expr/g;
+
+		$expr = "(rule_and_refint_t3.id3a = new.id3a) AND (rule_and_refint_t3.id3b = new.id3b)";
+		$dump =~ s/WHERE \(\(\Q$expr\E\)/WHERE ($expr/g;
+
+		$expr = "(rule_and_refint_t3_1.id3a = new.id3a) AND (rule_and_refint_t3_1.id3b = new.id3b)";
+		$dump =~ s/WHERE \(\(\Q$expr\E\)/WHERE ($expr/g;
+	}
+
+	if ($old_version lt '9.3')
+	{
+		# CREATE VIEW/RULE statements were not pretty-printed before 9.3.
+		# To cope, reduce all whitespace sequences within them to one space.
+		# This must be done on both old and new dumps.
+		$dump = _mash_view_whitespace($dump);
+
+		# _mash_view_whitespace doesn't handle multi-command rules;
+		# rather than trying to fix that, just hack the exceptions manually.
+
+		my $prefix = "CREATE RULE rtest_sys_del AS ON DELETE TO public.rtest_system DO (DELETE FROM public.rtest_interface WHERE (rtest_interface.sysname = old.sysname);";
+		my $line2 = " DELETE FROM public.rtest_admin";
+		my $line3 = " WHERE (rtest_admin.sysname = old.sysname);";
+		$dump =~ s/(?<=\Q$prefix\E)\Q$line2$line3\E \);/\n$line2\n $line3\n);/mg;
+
+		$prefix = "CREATE RULE rtest_sys_upd AS ON UPDATE TO public.rtest_system DO (UPDATE public.rtest_interface SET sysname = new.sysname WHERE (rtest_interface.sysname = old.sysname);";
+		$line2 = " UPDATE public.rtest_admin SET sysname = new.sysname";
+		$line3 = " WHERE (rtest_admin.sysname = old.sysname);";
+		$dump =~ s/(?<=\Q$prefix\E)\Q$line2$line3\E \);/\n$line2\n $line3\n);/mg;
+
+		# and there's one place where pre-9.3 uses less aliases than we do now
+
+		# add the alias to the subquery
+		$dump =~ s/(^CREATE\sRULE\srule_and_refint_t3_ins\s.*?)
+                   (SELECT\s1\sFROM\spublic\.rule_and_refint_t3)
+                   (\sWHERE)
+                  /$1$2 rule_and_refint_t3_1$3/mx;
+
+		# and use it in the subquery (g flag doesn't work here)
+		for (1..3)
+		{
+			$dump =~ s/(^CREATE\sRULE\srule_and_refint_t3_ins.*?)
+					   rule_and_refint_t3\.id3([abc])\s=\snew
+					   (.*?DO\sINSTEAD)
+					  /$1rule_and_refint_t3_1.id3$2 = new$3/mx;
+		}
+
+		# Also fix old use of NATURAL JOIN syntax
+		$dump =~
+		  s {NATURAL JOIN public\.credit_card r}
+			{JOIN public.credit_card r USING (cid)}mg;
+		$dump =~
+		  s {NATURAL JOIN public\.credit_usage r}
+			{JOIN public.credit_usage r USING (cid)}mg;
+	}
+
+	# Suppress blank lines, as some places in pg_dump emit more or fewer.
+	$dump =~ s/\n\n+/\n/g;
+
+	return $dump;
+}
+
+# Internal subroutine to mangle whitespace within view/rule commands.
+# Any consecutive sequence of whitespace is reduced to one space.
+sub _mash_view_whitespace
+{
+	my ($dump) = @_;
+
+	foreach my $leader ('CREATE VIEW', 'CREATE RULE')
+	{
+		my @splitchunks = split $leader, $dump;
+
+		$dump = shift(@splitchunks);
+		foreach my $chunk (@splitchunks)
+		{
+			my @thischunks = split /;/, $chunk, 2;
+			my $stmt = shift(@thischunks);
+
+			# now $stmt is just the body of the CREATE VIEW/RULE
+			$stmt =~ s/\s+/ /sg;
+			# we also need to smash these forms for sub-selects and rules
+			$stmt =~ s/\( SELECT/(SELECT/g;
+			$stmt =~ s/\( INSERT/(INSERT/g;
+			$stmt =~ s/\( UPDATE/(UPDATE/g;
+			$stmt =~ s/\( DELETE/(DELETE/g;
+
+			$dump .= $leader . $stmt . ';' . $thischunks[0];
+		}
+	}
+	return $dump;
+}
+
+=pod
+
+=item adjust_new_dumpfile($old_version, $dump)
+
+Edit a dump output file, taken from the pg_upgrade'd installation
+by current-version C<pg_dumpall -s>, so that it will match the old
+dump output file as adjusted by C<adjust_old_dumpfile>.
+
+Typically this involves deleting data not present in the old installation.
+
+Arguments:
+
+=over
+
+=item C<old_version>: Branch we are upgrading from.  This can be a branch
+name such as 'HEAD' or 'REL_11_STABLE', but it can also be any string
+that PostgreSQL::Version accepts.
+
+=item C<dump>: Contents of dump file
+
+=back
+
+Returns the modified dump text.
+
+=cut
+
+sub adjust_new_dumpfile
+{
+	my ($old_version, $dump) = @_;
+
+	# nothing to do for non-cross-version tests
+	return $dump if $old_version eq 'HEAD';
+
+	# convert branch name to numeric form
+	$old_version =~ s/REL_?(\d+(?:_\d+)?)_STABLE/$1/;
+	$old_version =~ s/_/./;
+	$old_version = PostgreSQL::Version->new($old_version);
+
+	# use Unix newlines
+
+	$dump =~ s/\r\n/\n/g;
+
+	# Version comments will certainly not match.
+	$dump =~ s/^-- Dumped from database version.*\n//mg;
+
+	if ($old_version < 14)
+	{
+		# Suppress noise-word uses of IN in CREATE/ALTER PROCEDURE.
+		$dump =~ s/^(CREATE PROCEDURE .*?)\(IN /$1(/mg;
+		$dump =~ s/^(ALTER PROCEDURE .*?)\(IN /$1(/mg;
+		$dump =~ s/^(CREATE PROCEDURE .*?), IN /$1, /mg;
+		$dump =~ s/^(ALTER PROCEDURE .*?), IN /$1, /mg;
+		$dump =~ s/^(CREATE PROCEDURE .*?), IN /$1, /mg;
+		$dump =~ s/^(ALTER PROCEDURE .*?), IN /$1, /mg;
+
+		# Remove SUBSCRIPT clauses in CREATE TYPE.
+		$dump =~ s/^\s+SUBSCRIPT = raw_array_subscript_handler,\n//mg;
+
+		# Remove multirange_type_name clauses in CREATE TYPE AS RANGE.
+		$dump =~ s {,\n\s+multirange_type_name = .*?(,?)$} {$1}mg;
+
+		# Remove mentions of extended hash functions.
+		my $line1 =
+		  "ALTER OPERATOR FAMILY public[.]part_test_.*?_ops USING hash ADD";
+		my $line2 = "FUNCTION 2 .*? public\.part_hash.*?[(].*?[)];";
+		$dump =~ s {^$line1\n\s+$line2} {}mg;
+	}
+
+	# pre-v12 dumps will not say anything about default_table_access_method.
+	if ($old_version < 12)
+	{
+		$dump =~ s/^SET default_table_access_method = heap;\n//mg;
+	}
+
+	# dumps from pre-9.6 dblink may include redundant ACL settings
+	if ($old_version lt '9.6')
+	{
+		my $comment =
+		  "-- Name: FUNCTION dblink_connect_u[(].*?[)]; Type: ACL; Schema: public; Owner: .*";
+		my $sql = "REVOKE ALL ON FUNCTION public[.]dblink_connect_u[(].*?[)] FROM PUBLIC;";
+		$dump =~
+		  s/^--\n$comment\n--\n+$sql\n+--\n$comment\n--\n+$sql\n//mg;
+	}
+
+	if ($old_version lt '9.3')
+	{
+		# CREATE VIEW/RULE statements were not pretty-printed before 9.3.
+		# To cope, reduce all whitespace sequences within them to one space.
+		# This must be done on both old and new dumps.
+		$dump = _mash_view_whitespace($dump);
+	}
+
+	# Suppress blank lines, as some places in pg_dump emit more or fewer.
+	$dump =~ s/\n\n+/\n/g;
+
+	return $dump;
+}
+
+=pod
+
+=back
+
+=cut
+
+1;

Reply via email to