On Sun, Oct 14, 2018 at 10:24:55AM -0400, Andrew Dunstan wrote:
> [snip]

Thanks a lot for the review, Andrew!

> This code now seems redundant:
> 
>          if (strcmp(fn, ".") == 0 ||
>              strcmp(fn, "..") == 0)
>              return true;

Indeed.  I have renamed skipfile() to isRelFileName on the way, which
looks better in the context.

> I would probably reverse the order of these two tests. It might not make any
> difference, assuming fn is never an empty string, but it seems more logical
> to me.
> 
>    +    /* good to go if only digits */
>    +    if (fn[pos] == '\0')
>    +        return false;
>    +    /* leave if no digits */
>    +    if (pos == 0)
>    +        return true;

No objections to that.  Done.

> It also looks to me like the check for a segment number doesn't ensure
> there is at least one digit, so "123." might pass, but I could be
> wrong. In any case, there isn't a test for that, and there probably
> should be.

You are right here.  This name pattern has been failing.  Fixed in the
attached.  I have added "123_" and "123_." as extra patterns to check.

What do you think about the updated version attached?
--
Michael
diff --git a/src/bin/pg_verify_checksums/pg_verify_checksums.c b/src/bin/pg_verify_checksums/pg_verify_checksums.c
index 1bc020ab6c..666ab3f21e 100644
--- a/src/bin/pg_verify_checksums/pg_verify_checksums.c
+++ b/src/bin/pg_verify_checksums/pg_verify_checksums.c
@@ -15,6 +15,7 @@
 
 #include "catalog/pg_control.h"
 #include "common/controldata_utils.h"
+#include "common/relpath.h"
 #include "getopt_long.h"
 #include "pg_getopt.h"
 #include "storage/bufpage.h"
@@ -49,27 +50,69 @@ usage(void)
 	printf(_("Report bugs to <pgsql-b...@postgresql.org>.\n"));
 }
 
-static const char *const skip[] = {
-	"pg_control",
-	"pg_filenode.map",
-	"pg_internal.init",
-	"PG_VERSION",
-	NULL,
-};
-
+/*
+ * isRelFileName
+ *
+ * Check if the given file name has a shape authorized for scanning.
+ */
 static bool
-skipfile(const char *fn)
+isRelFileName(const char *fn)
 {
-	const char *const *f;
+	int			pos;
 
-	if (strcmp(fn, ".") == 0 ||
-		strcmp(fn, "..") == 0)
+	/*----------
+	 * Only files including data checksums are authorized for scanning.  This
+	 * is guessed based on the file name, by reverse-engineering
+	 * GetRelationPath so make sure to update both code paths if any updates
+	 * are done. The following file names are allowed:
+	 * <digits>
+	 * <digits>.<segment>
+	 * <digits>_<forkname>
+	 * <digits>_<forkname>.<segment>
+	 *
+	 * Note that temporary files, beginning with 't', are also skipped.
+	 *
+	 *----------
+	 */
+
+	/* A non-empty string of digits should follow */
+	for (pos = 0; isdigit((unsigned char) fn[pos]); ++pos)
+		;
+	/* leave if no digits */
+	if (pos == 0)
+		return false;
+	/* good to go if only digits */
+	if (fn[pos] == '\0')
 		return true;
 
-	for (f = skip; *f; f++)
-		if (strcmp(*f, fn) == 0)
-			return true;
-	return false;
+	/* Authorized fork files can be scanned */
+	if (fn[pos] == '_')
+	{
+		int			forkchar = forkname_chars(&fn[pos + 1], NULL);
+
+		if (forkchar <= 0)
+			return false;
+
+		pos += forkchar + 1;
+	}
+
+	/* Check for an optional segment number */
+	if (fn[pos] == '.')
+	{
+		int			segchar;
+
+		for (segchar = 1; isdigit((unsigned char) fn[pos + segchar]); ++segchar)
+			;
+
+		if (segchar <= 1)
+			return false;
+		pos += segchar;
+	}
+
+	/* Now this should be the end */
+	if (fn[pos] != '\0')
+		return false;
+	return true;
 }
 
 static void
@@ -146,7 +189,7 @@ scan_directory(const char *basedir, const char *subdir)
 		char		fn[MAXPGPATH];
 		struct stat st;
 
-		if (skipfile(de->d_name))
+		if (!isRelFileName(de->d_name))
 			continue;
 
 		snprintf(fn, sizeof(fn), "%s/%s", path, de->d_name);
diff --git a/src/bin/pg_verify_checksums/t/002_actions.pl b/src/bin/pg_verify_checksums/t/002_actions.pl
index dc29da09af..8442534e95 100644
--- a/src/bin/pg_verify_checksums/t/002_actions.pl
+++ b/src/bin/pg_verify_checksums/t/002_actions.pl
@@ -5,7 +5,7 @@ use strict;
 use warnings;
 use PostgresNode;
 use TestLib;
-use Test::More tests => 12;
+use Test::More tests => 36;
 
 # Initialize node with checksums enabled.
 my $node = get_new_node('node_checksum');
@@ -17,6 +17,31 @@ command_like(['pg_controldata', $pgdata],
 	     qr/Data page checksum version:.*1/,
 		 'checksums enabled in control file');
 
+# Add set of dummy files with some contents.  These should not be scanned
+# by the tool.
+append_to_file "$pgdata/global/123.", "foo";
+append_to_file "$pgdata/global/123_", "foo";
+append_to_file "$pgdata/global/123_.", "foo";
+append_to_file "$pgdata/global/123.12t", "foo";
+append_to_file "$pgdata/global/foo", "foo2";
+append_to_file "$pgdata/global/t123", "bar";
+append_to_file "$pgdata/global/123a", "bar2";
+append_to_file "$pgdata/global/.123", "foobar";
+append_to_file "$pgdata/global/_fsm", "foobar2";
+append_to_file "$pgdata/global/_init", "foobar3";
+append_to_file "$pgdata/global/_vm.123", "foohoge";
+append_to_file "$pgdata/global/123_vm.123t", "foohoge2";
+
+# Those are correct but empty files, so they should pass through.
+append_to_file "$pgdata/global/99999", "";
+append_to_file "$pgdata/global/99999.123", "";
+append_to_file "$pgdata/global/99999_fsm", "";
+append_to_file "$pgdata/global/99999_init", "";
+append_to_file "$pgdata/global/99999_vm", "";
+append_to_file "$pgdata/global/99999_init.123", "";
+append_to_file "$pgdata/global/99999_fsm.123", "";
+append_to_file "$pgdata/global/99999_vm.123", "";
+
 # Checksums pass on a newly-created cluster
 command_ok(['pg_verify_checksums',  '-D', $pgdata],
 		   "succeeds with offline cluster");
@@ -67,3 +92,33 @@ $node->command_checks_all([ 'pg_verify_checksums', '-D', $pgdata, '-r',
 						  [qr/Bad checksums:.*1/],
 						  [qr/checksum verification failed/],
 						  'fails for corrupted data on single relfilenode');
+
+# Utility routine to check that pg_verify_checksums is correctly
+# able to detect correctly-named relation files with some corrupted
+# data.
+sub fail_corrupt
+{
+	my $node = shift;
+	my $file = shift;
+	my $pgdata = $node->data_dir;
+
+	# Create the file with some data in it.
+	append_to_file "$pgdata/global/$file", "foo";
+
+	$node->command_checks_all([ 'pg_verify_checksums', '-D', $pgdata],
+						  1,
+						  [qr/^$/],
+						  [qr/could not read block/],
+						  "fails for corrupted data in $file");
+}
+
+# Authorized relation files filled with corrupted data cause the
+# checksum checks to fail.
+fail_corrupt($node, "99999");
+fail_corrupt($node, "99999.123");
+fail_corrupt($node, "99999_fsm");
+fail_corrupt($node, "99999_init");
+fail_corrupt($node, "99999_vm");
+fail_corrupt($node, "99999_init.123");
+fail_corrupt($node, "99999_fsm.123");
+fail_corrupt($node, "99999_vm.123");

Attachment: signature.asc
Description: PGP signature

Reply via email to