On Tue, Nov 27, 2018 at 02:09:05PM +0100, Michael Banck wrote: > I had a quick look at fixing this but did not manage to immediately come > up with a solution, so posting here for now.
If you look at another thread, the patch posted on the top would actually solve this issue: https://www.postgresql.org/message-id/20181021134206.ga14...@paquier.xyz Your problem could also be solved with the minimalistic patch attached, so fixing on the way the problems with temporary files present in PGDATA something like the attached could be used... Based on the stale status of the other thread I am unsure what should be done though. -- Michael
diff --git a/src/bin/pg_verify_checksums/pg_verify_checksums.c b/src/bin/pg_verify_checksums/pg_verify_checksums.c index f0e09bea20..148aa511f6 100644 --- a/src/bin/pg_verify_checksums/pg_verify_checksums.c +++ b/src/bin/pg_verify_checksums/pg_verify_checksums.c @@ -21,6 +21,7 @@ #include "storage/bufpage.h" #include "storage/checksum.h" #include "storage/checksum_impl.h" +#include "storage/fd.h" static int64 files = 0; @@ -189,9 +190,22 @@ scan_directory(const char *basedir, const char *subdir) char fn[MAXPGPATH]; struct stat st; - if (!isRelFileName(de->d_name)) + if (strcmp(de->d_name, ".") == 0 || + strcmp(de->d_name, "..") == 0) continue; + /* Skip temporary files */ + if (strncmp(de->d_name, + PG_TEMP_FILE_PREFIX, + strlen(PG_TEMP_FILE_PREFIX)) == 0) + continue; + + /* Skip temporary folders */ + if (strncmp(de->d_name, + PG_TEMP_FILES_DIR, + strlen(PG_TEMP_FILES_DIR)) == 0) + return; + snprintf(fn, sizeof(fn), "%s/%s", path, de->d_name); if (lstat(fn, &st) < 0) { @@ -206,6 +220,13 @@ scan_directory(const char *basedir, const char *subdir) *segmentpath; BlockNumber segmentno = 0; + /* + * Only normal relation files can be analyzed. Note that this + * skips temporary relations. + */ + if (!isRelFileName(de->d_name)) + continue; + /* * Cut off at the segment boundary (".") to get the segment number * in order to mix it into the checksum. Then also cut off at the diff --git a/src/bin/pg_verify_checksums/t/002_actions.pl b/src/bin/pg_verify_checksums/t/002_actions.pl index 0e1725d9f2..fd64de050e 100644 --- a/src/bin/pg_verify_checksums/t/002_actions.pl +++ b/src/bin/pg_verify_checksums/t/002_actions.pl @@ -5,7 +5,7 @@ use strict; use warnings; use PostgresNode; use TestLib; -use Test::More tests => 36; +use Test::More tests => 42; # Initialize node with checksums enabled. my $node = get_new_node('node_checksum'); @@ -54,31 +54,7 @@ command_fails(['pg_verify_checksums', '-D', $pgdata], "fails with online cluster"); # Create table to corrupt and get its relfilenode -$node->safe_psql('postgres', - "SELECT a INTO corrupt1 FROM generate_series(1,10000) AS a; - ALTER TABLE corrupt1 SET (autovacuum_enabled=false);"); - -my $file_corrupted = $node->safe_psql('postgres', - "SELECT pg_relation_filepath('corrupt1')"); -my $relfilenode_corrupted = $node->safe_psql('postgres', - "SELECT relfilenode FROM pg_class WHERE relname = 'corrupt1';"); - -# Set page header and block size -my $pageheader_size = 24; -my $block_size = $node->safe_psql('postgres', 'SHOW block_size;'); -$node->stop; - -# Checksums are correct for single relfilenode as the table is not -# corrupted yet. -command_ok(['pg_verify_checksums', '-D', $pgdata, - '-r', $relfilenode_corrupted], - "succeeds for single relfilenode with offline cluster"); - -# Time to create some corruption -open my $file, '+<', "$pgdata/$file_corrupted"; -seek($file, $pageheader_size, 0); -syswrite($file, '\0\0\0\0\0\0\0\0\0'); -close $file; +my $relfilenode_corrupted = create_corruption($node, 'corrupt1', 'pg_default'); # Global checksum checks fail $node->command_checks_all([ 'pg_verify_checksums', '-D', $pgdata], @@ -95,6 +71,72 @@ $node->command_checks_all([ 'pg_verify_checksums', '-D', $pgdata, '-r', [qr/checksum verification failed/], 'fails for corrupted data on single relfilenode'); +# Drop corrupt table again and make sure there is no more corruption +$node->start; +$node->safe_psql('postgres', 'DROP TABLE corrupt1;'); +$node->stop; +$node->command_ok(['pg_verify_checksums', '-D', $pgdata], + 'succeeds again: '.$node->data_dir); + +# Create table to corrupt in a non-default tablespace and get its relfilenode +my $tablespace_dir = $node->data_dir."/../ts_corrupt_dir"; +mkdir ($tablespace_dir); +$node->start; +$node->safe_psql('postgres', "CREATE TABLESPACE ts_corrupt LOCATION '".$tablespace_dir."';"); +$relfilenode_corrupted = create_corruption($node, 'corrupt2', 'ts_corrupt'); +$node->command_checks_all([ 'pg_verify_checksums', '-D', $pgdata], + 1, + [qr/Bad checksums:.*1/], + [qr/checksum verification failed/], + 'fails with corrupted data in non-default tablespace'); + +# Drop corrupt table again and make sure there is no more corruption +$node->start; +$node->safe_psql('postgres', 'DROP TABLE corrupt2;'); +$node->stop; +$node->command_ok(['pg_verify_checksums', '-D', $pgdata], + 'succeeds again'); + +# Utility routine to create a table with corrupted checksums. +# It stops the node (if running), and starts it again. +sub create_corruption +{ + my $node = shift; + my $table = shift; + my $tablespace = shift; + + $node->safe_psql('postgres', + "SELECT a INTO ".$table." FROM generate_series(1,10000) AS a; + ALTER TABLE ".$table." SET (autovacuum_enabled=false);"); + + $node->safe_psql('postgres', + "ALTER TABLE ".$table." SET TABLESPACE ".$tablespace.";"); + + my $file_corrupted = $node->safe_psql('postgres', + "SELECT pg_relation_filepath('".$table."');"); + my $relfilenode_corrupted = $node->safe_psql('postgres', + "SELECT relfilenode FROM pg_class WHERE relname = '".$table."';"); + + # Set page header and block size + my $pageheader_size = 24; + my $block_size = $node->safe_psql('postgres', 'SHOW block_size;'); + $node->stop; + + # Checksums are correct for single relfilenode as the table is not + # corrupted yet. + command_ok(['pg_verify_checksums', '-D', $pgdata, + '-r', $relfilenode_corrupted], + "succeeds for single relfilenode with offline cluster"); + + # Time to create some corruption + open my $file, '+<', "$pgdata/$file_corrupted"; + seek($file, $pageheader_size, 0); + syswrite($file, '\0\0\0\0\0\0\0\0\0'); + close $file; + + return $relfilenode_corrupted; +} + # Utility routine to check that pg_verify_checksums is able to detect # correctly-named relation files filled with some corrupted data. sub fail_corrupt
signature.asc
Description: PGP signature