On Fri, Oct 12, 2018 at 12:11:58PM +0900, Michael Paquier wrote:
> Agreed.  I am just working on a patch for v11- which uses a
> whitelist-based method instead of what is present now.  Reverting the
> tests to put the buildfarm to green could be done, but that's not the
> root of the problem.

So, I have coded this thing, and finish with the attached.  The
following file patterns are accepted for the checksums:
<digits>.<segment>
<digits>_<forkname>
<digits>_<forkname>.<segment>
I have added some tests on the way to make sure that all the patterns
get covered.  Please note that this skips the temporary files.

Thoughts?
--
Michael
diff --git a/src/bin/pg_verify_checksums/pg_verify_checksums.c b/src/bin/pg_verify_checksums/pg_verify_checksums.c
index 1bc020ab6c..81abb021d9 100644
--- a/src/bin/pg_verify_checksums/pg_verify_checksums.c
+++ b/src/bin/pg_verify_checksums/pg_verify_checksums.c
@@ -49,26 +49,64 @@ usage(void)
 	printf(_("Report bugs to <pgsql-b...@postgresql.org>.\n"));
 }
 
-static const char *const skip[] = {
-	"pg_control",
-	"pg_filenode.map",
-	"pg_internal.init",
-	"PG_VERSION",
-	NULL,
-};
-
 static bool
 skipfile(const char *fn)
 {
-	const char *const *f;
+	int			pos;
+	int			savepos;
 
 	if (strcmp(fn, ".") == 0 ||
 		strcmp(fn, "..") == 0)
 		return true;
 
-	for (f = skip; *f; f++)
-		if (strcmp(*f, fn) == 0)
+	/*----------
+	 * Only files including data checksums are authorized for scanning.  This
+	 * is guessed based on the file name, by reverse-engineering
+	 * GetRelationPath so make sure to update both code paths if any updates
+	 * are done. The following file names are allowed: <digits>
+	 * <digits>.<segment>
+	 * <digits>_<forkname>
+	 * <digits>_<forkname>.<segment>
+	 *
+	 * Note that temporary files, beginning with 't', are also skipped.
+	 *
+	 *----------
+	 */
+
+	/* A non-empty string of digits should follow */
+	for (pos = 0; isdigit((unsigned char) fn[pos]); ++pos)
+		;
+	/* good to go if only digits */
+	if (fn[pos] == '\0')
+		return false;
+	/* leave if no digits */
+	if (pos == 0)
+		return true;
+
+	/* VM and FSM files can be scanned */
+	if (fn[pos] == '_')
+	{
+		if (strncmp(fn + pos + 1, "vm", 2) == 0)
+			pos += 3;
+		else if (strncmp(fn + pos + 1, "fsm", 3) == 0)
+			pos += 4;
+		else
 			return true;
+	}
+
+	/* Check for an optional segment number */
+	if (fn[pos] == '.')
+	{
+		savepos = pos + 1;		/* count the dot */
+		/* This should use only digits */
+		for (pos = savepos; isdigit((unsigned char) fn[pos]); ++pos)
+			;
+		if (fn[pos] != '\0')
+			return true;
+	}
+
+	if (fn[pos] != '\0')
+		return true;
 	return false;
 }
 
diff --git a/src/bin/pg_verify_checksums/t/002_actions.pl b/src/bin/pg_verify_checksums/t/002_actions.pl
index dc29da09af..6ad1d019f1 100644
--- a/src/bin/pg_verify_checksums/t/002_actions.pl
+++ b/src/bin/pg_verify_checksums/t/002_actions.pl
@@ -17,6 +17,25 @@ command_like(['pg_controldata', $pgdata],
 	     qr/Data page checksum version:.*1/,
 		 'checksums enabled in control file');
 
+# Add set of dummy files with some contents.  These should not be scanned
+# by the tool.
+append_to_file "$pgdata/global/123.12t", "foo";
+append_to_file "$pgdata/global/foo", "foo2";
+append_to_file "$pgdata/global/t123", "bar";
+append_to_file "$pgdata/global/123a", "bar2";
+append_to_file "$pgdata/global/.123", "foobar";
+append_to_file "$pgdata/global/_fsm", "foobar2";
+append_to_file "$pgdata/global/_vm.123", "foohoge";
+append_to_file "$pgdata/global/123_vm.123t", "foohoge2";
+
+# Those are correct but empty files, so they should pass through.
+append_to_file "$pgdata/global/99999", "";
+append_to_file "$pgdata/global/99999.123", "";
+append_to_file "$pgdata/global/99999_vm", "";
+append_to_file "$pgdata/global/99999_fsm", "";
+append_to_file "$pgdata/global/99999_vm.123", "";
+append_to_file "$pgdata/global/99999_fsm.123", "";
+
 # Checksums pass on a newly-created cluster
 command_ok(['pg_verify_checksums',  '-D', $pgdata],
 		   "succeeds with offline cluster");

Attachment: signature.asc
Description: PGP signature

Reply via email to