On Sun, Oct 14, 2018 at 10:24:55AM -0400, Andrew Dunstan wrote: > [snip]
Thanks a lot for the review, Andrew! > This code now seems redundant: > > if (strcmp(fn, ".") == 0 || > strcmp(fn, "..") == 0) > return true; Indeed. I have renamed skipfile() to isRelFileName on the way, which looks better in the context. > I would probably reverse the order of these two tests. It might not make any > difference, assuming fn is never an empty string, but it seems more logical > to me. > > + /* good to go if only digits */ > + if (fn[pos] == '\0') > + return false; > + /* leave if no digits */ > + if (pos == 0) > + return true; No objections to that. Done. > It also looks to me like the check for a segment number doesn't ensure > there is at least one digit, so "123." might pass, but I could be > wrong. In any case, there isn't a test for that, and there probably > should be. You are right here. This name pattern has been failing. Fixed in the attached. I have added "123_" and "123_." as extra patterns to check. What do you think about the updated version attached? -- Michael
diff --git a/src/bin/pg_verify_checksums/pg_verify_checksums.c b/src/bin/pg_verify_checksums/pg_verify_checksums.c index 1bc020ab6c..666ab3f21e 100644 --- a/src/bin/pg_verify_checksums/pg_verify_checksums.c +++ b/src/bin/pg_verify_checksums/pg_verify_checksums.c @@ -15,6 +15,7 @@ #include "catalog/pg_control.h" #include "common/controldata_utils.h" +#include "common/relpath.h" #include "getopt_long.h" #include "pg_getopt.h" #include "storage/bufpage.h" @@ -49,27 +50,69 @@ usage(void) printf(_("Report bugs to <pgsql-b...@postgresql.org>.\n")); } -static const char *const skip[] = { - "pg_control", - "pg_filenode.map", - "pg_internal.init", - "PG_VERSION", - NULL, -}; - +/* + * isRelFileName + * + * Check if the given file name has a shape authorized for scanning. + */ static bool -skipfile(const char *fn) +isRelFileName(const char *fn) { - const char *const *f; + int pos; - if (strcmp(fn, ".") == 0 || - strcmp(fn, "..") == 0) + /*---------- + * Only files including data checksums are authorized for scanning. This + * is guessed based on the file name, by reverse-engineering + * GetRelationPath so make sure to update both code paths if any updates + * are done. The following file names are allowed: + * <digits> + * <digits>.<segment> + * <digits>_<forkname> + * <digits>_<forkname>.<segment> + * + * Note that temporary files, beginning with 't', are also skipped. + * + *---------- + */ + + /* A non-empty string of digits should follow */ + for (pos = 0; isdigit((unsigned char) fn[pos]); ++pos) + ; + /* leave if no digits */ + if (pos == 0) + return false; + /* good to go if only digits */ + if (fn[pos] == '\0') return true; - for (f = skip; *f; f++) - if (strcmp(*f, fn) == 0) - return true; - return false; + /* Authorized fork files can be scanned */ + if (fn[pos] == '_') + { + int forkchar = forkname_chars(&fn[pos + 1], NULL); + + if (forkchar <= 0) + return false; + + pos += forkchar + 1; + } + + /* Check for an optional segment number */ + if (fn[pos] == '.') + { + int segchar; + + for (segchar = 1; isdigit((unsigned char) fn[pos + segchar]); ++segchar) + ; + + if (segchar <= 1) + return false; + pos += segchar; + } + + /* Now this should be the end */ + if (fn[pos] != '\0') + return false; + return true; } static void @@ -146,7 +189,7 @@ scan_directory(const char *basedir, const char *subdir) char fn[MAXPGPATH]; struct stat st; - if (skipfile(de->d_name)) + if (!isRelFileName(de->d_name)) continue; snprintf(fn, sizeof(fn), "%s/%s", path, de->d_name); diff --git a/src/bin/pg_verify_checksums/t/002_actions.pl b/src/bin/pg_verify_checksums/t/002_actions.pl index dc29da09af..8442534e95 100644 --- a/src/bin/pg_verify_checksums/t/002_actions.pl +++ b/src/bin/pg_verify_checksums/t/002_actions.pl @@ -5,7 +5,7 @@ use strict; use warnings; use PostgresNode; use TestLib; -use Test::More tests => 12; +use Test::More tests => 36; # Initialize node with checksums enabled. my $node = get_new_node('node_checksum'); @@ -17,6 +17,31 @@ command_like(['pg_controldata', $pgdata], qr/Data page checksum version:.*1/, 'checksums enabled in control file'); +# Add set of dummy files with some contents. These should not be scanned +# by the tool. +append_to_file "$pgdata/global/123.", "foo"; +append_to_file "$pgdata/global/123_", "foo"; +append_to_file "$pgdata/global/123_.", "foo"; +append_to_file "$pgdata/global/123.12t", "foo"; +append_to_file "$pgdata/global/foo", "foo2"; +append_to_file "$pgdata/global/t123", "bar"; +append_to_file "$pgdata/global/123a", "bar2"; +append_to_file "$pgdata/global/.123", "foobar"; +append_to_file "$pgdata/global/_fsm", "foobar2"; +append_to_file "$pgdata/global/_init", "foobar3"; +append_to_file "$pgdata/global/_vm.123", "foohoge"; +append_to_file "$pgdata/global/123_vm.123t", "foohoge2"; + +# Those are correct but empty files, so they should pass through. +append_to_file "$pgdata/global/99999", ""; +append_to_file "$pgdata/global/99999.123", ""; +append_to_file "$pgdata/global/99999_fsm", ""; +append_to_file "$pgdata/global/99999_init", ""; +append_to_file "$pgdata/global/99999_vm", ""; +append_to_file "$pgdata/global/99999_init.123", ""; +append_to_file "$pgdata/global/99999_fsm.123", ""; +append_to_file "$pgdata/global/99999_vm.123", ""; + # Checksums pass on a newly-created cluster command_ok(['pg_verify_checksums', '-D', $pgdata], "succeeds with offline cluster"); @@ -67,3 +92,33 @@ $node->command_checks_all([ 'pg_verify_checksums', '-D', $pgdata, '-r', [qr/Bad checksums:.*1/], [qr/checksum verification failed/], 'fails for corrupted data on single relfilenode'); + +# Utility routine to check that pg_verify_checksums is correctly +# able to detect correctly-named relation files with some corrupted +# data. +sub fail_corrupt +{ + my $node = shift; + my $file = shift; + my $pgdata = $node->data_dir; + + # Create the file with some data in it. + append_to_file "$pgdata/global/$file", "foo"; + + $node->command_checks_all([ 'pg_verify_checksums', '-D', $pgdata], + 1, + [qr/^$/], + [qr/could not read block/], + "fails for corrupted data in $file"); +} + +# Authorized relation files filled with corrupted data cause the +# checksum checks to fail. +fail_corrupt($node, "99999"); +fail_corrupt($node, "99999.123"); +fail_corrupt($node, "99999_fsm"); +fail_corrupt($node, "99999_init"); +fail_corrupt($node, "99999_vm"); +fail_corrupt($node, "99999_init.123"); +fail_corrupt($node, "99999_fsm.123"); +fail_corrupt($node, "99999_vm.123");
signature.asc
Description: PGP signature