On Tue, Nov 27, 2018 at 06:27:57PM -0500, Stephen Frost wrote:
> * Michael Paquier (mich...@paquier.xyz) wrote:
>> Believe me or not, but we have spent so much energy on this stuff that I
>> am ready to give up on the whitelist patch and focus on other business.
> 
> I would have hoped that you'd see why I was concerned from the start
> about this now that we have a released version of pg_verify_checksums
> in 11.1 that is paper-bag-worthy.

That's unrelated.  Let's be clear, I still don't like the fact that we
don't use a whitelist approach, per the arguments raised upthread.  The
tool also clearly lacked testing and coverage from the beginning and has
never been able to work on Windows, which was a very bad decision.
Still we need to live with that and I take my share of the blame,
willing to make this stuff work better for all.

>> - skipfile should be called after making sure that we work on a file.
> 
> It's not clear to me what this is referring to, exactly...?  Can you
> clarify?

Please see 0002 attached, which moves the call to skipfile() where I
think it should go.

>> - it is necessary to exclude EXEC_BACKEND files.
> 
> Agreed, they should be added to the exclude list.

Base backups are impacted as well as this causes spurious warnings.

Attached are two patches to fix all the mess:
- 0001 is a revert of the whitelist, minus the set of regression tests
checking after corrupted files and empty files.
- 0002 is a fix for all the issues reported on this thread, with tests
added (including the tablespace test from Michael Banck):
-- Base backups gain EXEC_BACKEND files in their warning filters.
-- pg_verify_checksums gains the same files.
-- temporary files are filtered out.
-- pg_verify_checksums performs filtering checks only on regular files,
not on paths.

0001 and 0002 need to be merged as 0001 would cause the buildfarm to
turn red on Windows if applied alone.  Can you know see my point?
--
Michael
From ad07e7c4ef9c40e2fc13ff59bb51a071ab69ebbe Mon Sep 17 00:00:00 2001
From: Michael Paquier <mich...@paquier.xyz>
Date: Wed, 28 Nov 2018 09:32:44 +0900
Subject: [PATCH 1/2] Switch pg_verify_checksums back to a blacklist

This basically reverts commit d55241af705667d4503638e3f77d3689fd6be31,
leaving around a portion of the regression tests still adapted with
empty relation files, and corrupted cases.
---
 .../pg_verify_checksums/pg_verify_checksums.c | 77 ++++---------------
 src/bin/pg_verify_checksums/t/002_actions.pl  | 17 ----
 2 files changed, 17 insertions(+), 77 deletions(-)

diff --git a/src/bin/pg_verify_checksums/pg_verify_checksums.c b/src/bin/pg_verify_checksums/pg_verify_checksums.c
index f0e09bea20..1bc020ab6c 100644
--- a/src/bin/pg_verify_checksums/pg_verify_checksums.c
+++ b/src/bin/pg_verify_checksums/pg_verify_checksums.c
@@ -15,7 +15,6 @@
 
 #include "catalog/pg_control.h"
 #include "common/controldata_utils.h"
-#include "common/relpath.h"
 #include "getopt_long.h"
 #include "pg_getopt.h"
 #include "storage/bufpage.h"
@@ -50,69 +49,27 @@ usage(void)
 	printf(_("Report bugs to <pgsql-b...@postgresql.org>.\n"));
 }
 
-/*
- * isRelFileName
- *
- * Check if the given file name is authorized for checksum verification.
- */
+static const char *const skip[] = {
+	"pg_control",
+	"pg_filenode.map",
+	"pg_internal.init",
+	"PG_VERSION",
+	NULL,
+};
+
 static bool
-isRelFileName(const char *fn)
+skipfile(const char *fn)
 {
-	int			pos;
+	const char *const *f;
 
-	/*----------
-	 * Only files including data checksums are authorized for verification.
-	 * This is guessed based on the file name by reverse-engineering
-	 * GetRelationPath() so make sure to update both code paths if any
-	 * updates are done.  The following file name formats are allowed:
-	 * <digits>
-	 * <digits>.<segment>
-	 * <digits>_<forkname>
-	 * <digits>_<forkname>.<segment>
-	 *
-	 * Note that temporary files, beginning with 't', are also skipped.
-	 *
-	 *----------
-	 */
-
-	/* A non-empty string of digits should follow */
-	for (pos = 0; isdigit((unsigned char) fn[pos]); ++pos)
-		;
-	/* leave if no digits */
-	if (pos == 0)
-		return false;
-	/* good to go if only digits */
-	if (fn[pos] == '\0')
+	if (strcmp(fn, ".") == 0 ||
+		strcmp(fn, "..") == 0)
 		return true;
 
-	/* Authorized fork files can be scanned */
-	if (fn[pos] == '_')
-	{
-		int			forkchar = forkname_chars(&fn[pos + 1], NULL);
-
-		if (forkchar <= 0)
-			return false;
-
-		pos += forkchar + 1;
-	}
-
-	/* Check for an optional segment number */
-	if (fn[pos] == '.')
-	{
-		int			segchar;
-
-		for (segchar = 1; isdigit((unsigned char) fn[pos + segchar]); ++segchar)
-			;
-
-		if (segchar <= 1)
-			return false;
-		pos += segchar;
-	}
-
-	/* Now this should be the end */
-	if (fn[pos] != '\0')
-		return false;
-	return true;
+	for (f = skip; *f; f++)
+		if (strcmp(*f, fn) == 0)
+			return true;
+	return false;
 }
 
 static void
@@ -189,7 +146,7 @@ scan_directory(const char *basedir, const char *subdir)
 		char		fn[MAXPGPATH];
 		struct stat st;
 
-		if (!isRelFileName(de->d_name))
+		if (skipfile(de->d_name))
 			continue;
 
 		snprintf(fn, sizeof(fn), "%s/%s", path, de->d_name);
diff --git a/src/bin/pg_verify_checksums/t/002_actions.pl b/src/bin/pg_verify_checksums/t/002_actions.pl
index 0e1725d9f2..c640cce260 100644
--- a/src/bin/pg_verify_checksums/t/002_actions.pl
+++ b/src/bin/pg_verify_checksums/t/002_actions.pl
@@ -17,23 +17,6 @@ command_like(['pg_controldata', $pgdata],
 	     qr/Data page checksum version:.*1/,
 		 'checksums enabled in control file');
 
-# Add set of dummy files with some contents.  These should not be scanned
-# by the tool.
-
-# On Windows, file name "foo." == "foo", so skip that pattern there.
-append_to_file "$pgdata/global/123.", "foo" unless $windows_os;
-append_to_file "$pgdata/global/123_", "foo";
-append_to_file "$pgdata/global/123_.", "foo" unless $windows_os;;
-append_to_file "$pgdata/global/123.12t", "foo";
-append_to_file "$pgdata/global/foo", "foo2";
-append_to_file "$pgdata/global/t123", "bar";
-append_to_file "$pgdata/global/123a", "bar2";
-append_to_file "$pgdata/global/.123", "foobar";
-append_to_file "$pgdata/global/_fsm", "foobar2";
-append_to_file "$pgdata/global/_init", "foobar3";
-append_to_file "$pgdata/global/_vm.123", "foohoge";
-append_to_file "$pgdata/global/123_vm.123t", "foohoge2";
-
 # These are correct but empty files, so they should pass through.
 append_to_file "$pgdata/global/99999", "";
 append_to_file "$pgdata/global/99999.123", "";
-- 
2.19.1

From fbd412df74ff6ece7cc940668d4bdc25bd3ab77b Mon Sep 17 00:00:00 2001
From: Michael Paquier <mich...@paquier.xyz>
Date: Wed, 28 Nov 2018 09:51:49 +0900
Subject: [PATCH 2/2] Fix set of issues with pg_verify_checksums and base
 backups

Three issues are fixed in this patch:
- Base backups forgot to ignore files specific to EXEC_BACKEND, leading
to spurious warnings when checksums are enabled, per analysis from me.
- pg_verify_checksums forgot about files specific to EXEC_BACKEND,
leading to failures of the tool on any such build, particularly
Windows, error found by newly-introduced TAP tests, by various buildfarm
members.
- pg_verify_checksums forgot to count for temporary files and temporary
paths, which could be valid relation files, without checksums, per
report from Andres Freund.  Some tests are added to cover this case.

A new test case which emulates corruption for a file in a different
tablespace is added, coming from from Michael Banck, while I have coded
the main code.

Author: Michael Banck, Michael Paquier
Discussion: https://postgr.es/m/XXX
---
 src/backend/replication/basebackup.c          |   4 +
 .../pg_verify_checksums/pg_verify_checksums.c |  28 ++++-
 src/bin/pg_verify_checksums/t/002_actions.pl  | 100 +++++++++++++-----
 3 files changed, 101 insertions(+), 31 deletions(-)

diff --git a/src/backend/replication/basebackup.c b/src/backend/replication/basebackup.c
index a7e3db2783..a7b7ab9d09 100644
--- a/src/backend/replication/basebackup.c
+++ b/src/backend/replication/basebackup.c
@@ -195,6 +195,10 @@ static const char *const noChecksumFiles[] = {
 	"pg_filenode.map",
 	"pg_internal.init",
 	"PG_VERSION",
+#ifdef EXEC_BACKEND
+	"config_exec_params",
+	"config_exec_params.new",
+#endif
 	NULL,
 };
 
diff --git a/src/bin/pg_verify_checksums/pg_verify_checksums.c b/src/bin/pg_verify_checksums/pg_verify_checksums.c
index 1bc020ab6c..5963053e00 100644
--- a/src/bin/pg_verify_checksums/pg_verify_checksums.c
+++ b/src/bin/pg_verify_checksums/pg_verify_checksums.c
@@ -20,6 +20,7 @@
 #include "storage/bufpage.h"
 #include "storage/checksum.h"
 #include "storage/checksum_impl.h"
+#include "storage/fd.h"
 
 
 static int64 files = 0;
@@ -54,6 +55,10 @@ static const char *const skip[] = {
 	"pg_filenode.map",
 	"pg_internal.init",
 	"PG_VERSION",
+#ifdef EXEC_BACKEND
+	"config_exec_params",
+	"config_exec_params.new",
+#endif
 	NULL,
 };
 
@@ -62,13 +67,10 @@ skipfile(const char *fn)
 {
 	const char *const *f;
 
-	if (strcmp(fn, ".") == 0 ||
-		strcmp(fn, "..") == 0)
-		return true;
-
 	for (f = skip; *f; f++)
 		if (strcmp(*f, fn) == 0)
 			return true;
+
 	return false;
 }
 
@@ -146,9 +148,22 @@ scan_directory(const char *basedir, const char *subdir)
 		char		fn[MAXPGPATH];
 		struct stat st;
 
-		if (skipfile(de->d_name))
+		if (strcmp(de->d_name, ".") == 0 ||
+			strcmp(de->d_name, "..") == 0)
 			continue;
 
+		/* Skip temporary files */
+		if (strncmp(de->d_name,
+					PG_TEMP_FILE_PREFIX,
+					strlen(PG_TEMP_FILE_PREFIX)) == 0)
+			continue;
+
+		/* Skip temporary folders */
+		if (strncmp(de->d_name,
+					PG_TEMP_FILES_DIR,
+					strlen(PG_TEMP_FILES_DIR)) == 0)
+			return;
+
 		snprintf(fn, sizeof(fn), "%s/%s", path, de->d_name);
 		if (lstat(fn, &st) < 0)
 		{
@@ -163,6 +178,9 @@ scan_directory(const char *basedir, const char *subdir)
 					   *segmentpath;
 			BlockNumber segmentno = 0;
 
+			if (skipfile(de->d_name))
+				continue;
+
 			/*
 			 * Cut off at the segment boundary (".") to get the segment number
 			 * in order to mix it into the checksum. Then also cut off at the
diff --git a/src/bin/pg_verify_checksums/t/002_actions.pl b/src/bin/pg_verify_checksums/t/002_actions.pl
index c640cce260..bea8701fe2 100644
--- a/src/bin/pg_verify_checksums/t/002_actions.pl
+++ b/src/bin/pg_verify_checksums/t/002_actions.pl
@@ -5,7 +5,7 @@ use strict;
 use warnings;
 use PostgresNode;
 use TestLib;
-use Test::More tests => 36;
+use Test::More tests => 42;
 
 # Initialize node with checksums enabled.
 my $node = get_new_node('node_checksum');
@@ -27,6 +27,12 @@ append_to_file "$pgdata/global/99999_init.123", "";
 append_to_file "$pgdata/global/99999_fsm.123", "";
 append_to_file "$pgdata/global/99999_vm.123", "";
 
+# There are temporary files and folders with dummy contents, which
+# should be ignored by the scan.
+append_to_file "$pgdata/global/pgsql_tmp_123", "foo";
+mkdir "$pgdata/global/pgsql_tmp";
+append_to_file "$pgdata/global/pgsql_tmp/1.1", "foo";
+
 # Checksums pass on a newly-created cluster
 command_ok(['pg_verify_checksums',  '-D', $pgdata],
 		   "succeeds with offline cluster");
@@ -37,31 +43,7 @@ command_fails(['pg_verify_checksums',  '-D', $pgdata],
 			  "fails with online cluster");
 
 # Create table to corrupt and get its relfilenode
-$node->safe_psql('postgres',
-	"SELECT a INTO corrupt1 FROM generate_series(1,10000) AS a;
-	ALTER TABLE corrupt1 SET (autovacuum_enabled=false);");
-
-my $file_corrupted = $node->safe_psql('postgres',
-	"SELECT pg_relation_filepath('corrupt1')");
-my $relfilenode_corrupted =  $node->safe_psql('postgres',
-	"SELECT relfilenode FROM pg_class WHERE relname = 'corrupt1';");
-
-# Set page header and block size
-my $pageheader_size = 24;
-my $block_size = $node->safe_psql('postgres', 'SHOW block_size;');
-$node->stop;
-
-# Checksums are correct for single relfilenode as the table is not
-# corrupted yet.
-command_ok(['pg_verify_checksums',  '-D', $pgdata,
-	'-r', $relfilenode_corrupted],
-	"succeeds for single relfilenode with offline cluster");
-
-# Time to create some corruption
-open my $file, '+<', "$pgdata/$file_corrupted";
-seek($file, $pageheader_size, 0);
-syswrite($file, '\0\0\0\0\0\0\0\0\0');
-close $file;
+my $relfilenode_corrupted = create_corruption($node, 'corrupt1', 'pg_default');
 
 # Global checksum checks fail
 $node->command_checks_all([ 'pg_verify_checksums', '-D', $pgdata],
@@ -78,6 +60,72 @@ $node->command_checks_all([ 'pg_verify_checksums', '-D', $pgdata, '-r',
 						  [qr/checksum verification failed/],
 						  'fails for corrupted data on single relfilenode');
 
+# Drop corrupt table again and make sure there is no more corruption
+$node->start;
+$node->safe_psql('postgres', 'DROP TABLE corrupt1;');
+$node->stop;
+$node->command_ok(['pg_verify_checksums', '-D', $pgdata],
+        'succeeds again: '.$node->data_dir);
+
+# Create table to corrupt in a non-default tablespace and get its relfilenode
+my $tablespace_dir = $node->data_dir."/../ts_corrupt_dir";
+mkdir ($tablespace_dir);
+$node->start;
+$node->safe_psql('postgres', "CREATE TABLESPACE ts_corrupt LOCATION '".$tablespace_dir."';");
+$relfilenode_corrupted = create_corruption($node, 'corrupt2', 'ts_corrupt');
+$node->command_checks_all([ 'pg_verify_checksums', '-D', $pgdata],
+						  1,
+						  [qr/Bad checksums:.*1/],
+						  [qr/checksum verification failed/],
+						  'fails with corrupted data in non-default tablespace');
+
+# Drop corrupt table again and make sure there is no more corruption
+$node->start;
+$node->safe_psql('postgres', 'DROP TABLE corrupt2;');
+$node->stop;
+$node->command_ok(['pg_verify_checksums', '-D', $pgdata],
+        'succeeds again');
+
+# Utility routine to create a table with corrupted checksums.
+# It stops the node (if running), and starts it again.
+sub create_corruption
+{
+	my $node = shift;
+	my $table = shift;
+	my $tablespace = shift;
+
+	$node->safe_psql('postgres',
+		"SELECT a INTO ".$table." FROM generate_series(1,10000) AS a;
+		ALTER TABLE ".$table." SET (autovacuum_enabled=false);");
+
+	$node->safe_psql('postgres',
+		"ALTER TABLE ".$table." SET TABLESPACE ".$tablespace.";");
+
+	my $file_corrupted = $node->safe_psql('postgres',
+		"SELECT pg_relation_filepath('".$table."');");
+	my $relfilenode_corrupted =  $node->safe_psql('postgres',
+		"SELECT relfilenode FROM pg_class WHERE relname = '".$table."';");
+
+	# Set page header and block size
+	my $pageheader_size = 24;
+	my $block_size = $node->safe_psql('postgres', 'SHOW block_size;');
+	$node->stop;
+
+	# Checksums are correct for single relfilenode as the table is not
+	# corrupted yet.
+	command_ok(['pg_verify_checksums',  '-D', $pgdata,
+		'-r', $relfilenode_corrupted],
+		"succeeds for single relfilenode with offline cluster");
+
+	# Time to create some corruption
+	open my $file, '+<', "$pgdata/$file_corrupted";
+	seek($file, $pageheader_size, 0);
+	syswrite($file, '\0\0\0\0\0\0\0\0\0');
+	close $file;
+
+	return $relfilenode_corrupted;
+}
+
 # Utility routine to check that pg_verify_checksums is able to detect
 # correctly-named relation files filled with some corrupted data.
 sub fail_corrupt
-- 
2.19.1

Attachment: signature.asc
Description: PGP signature

Reply via email to