Hi Fabien,

On Thu, Oct 25, 2018 at 10:16:03AM +0200, Fabien COELHO wrote:
> >New version 5 attached.
> 
> Patch does not seem to apply anymore.

Thanks, rebased version attached.

> Moreover, ISTM that some discussions about behavioral changes are not fully
> settled.
> 
> My current opinion is that when offline some errors are not admissible,
> whereas the same errors are admissible when online because they may be due
> to the ongoing database processing, so the behavior should not be strictly
> the same.

Indeed, the recently-added pg_verify_checksums testsuite adds a few
files with just 'foo' in them and with V5 of the patch,
pg_verify_checksums no longer bails out with an error on those.

I have now re-added the retry logic for partially-read pages, so that it
bails out if it reads a page partially twice. This makes the testsuite
work again.

I am not convinced we need to differentiate further between online and
offline operation, can you explain in more detail which other
differences are ok in online mode and why?
 
> This might suggest some option to tell the command that it should work in
> online or offline mode, so that it may be stricter in some cases. The
> default may be one of the option, eg the stricter offline mode, or maybe
> guessed at startup.

If we believe the operation should be different, the patch removes the
"is cluster online?" check (as it is no longer necessary), so we could
just replace the current error message with a global variable with the
result of that check and use it where needed (if any).


Michael

-- 
Michael Banck
Projektleiter / Senior Berater
Tel.: +49 2166 9901-171
Fax:  +49 2166 9901-100
Email: michael.ba...@credativ.de

credativ GmbH, HRB Mönchengladbach 12080
USt-ID-Nummer: DE204566209
Trompeterallee 108, 41189 Mönchengladbach
Geschäftsführung: Dr. Michael Meskes, Jörg Folz, Sascha Heuer

Unser Umgang mit personenbezogenen Daten unterliegt
folgenden Bestimmungen: https://www.credativ.de/datenschutz
diff --git a/doc/src/sgml/ref/pg_verify_checksums.sgml b/doc/src/sgml/ref/pg_verify_checksums.sgml
index 905b8f1222..4ad6edcde6 100644
--- a/doc/src/sgml/ref/pg_verify_checksums.sgml
+++ b/doc/src/sgml/ref/pg_verify_checksums.sgml
@@ -37,9 +37,8 @@ PostgreSQL documentation
   <title>Description</title>
   <para>
    <command>pg_verify_checksums</command> verifies data checksums in a
-   <productname>PostgreSQL</productname> cluster.  The server must be shut
-   down cleanly before running <application>pg_verify_checksums</application>.
-   The exit status is zero if there are no checksum errors, otherwise nonzero.
+   <productname>PostgreSQL</productname> cluster.  The exit status is zero if
+   there are no checksum errors, otherwise nonzero.  
   </para>
  </refsect1>
 
diff --git a/src/bin/pg_verify_checksums/pg_verify_checksums.c b/src/bin/pg_verify_checksums/pg_verify_checksums.c
index f0e09bea20..0958631a7a 100644
--- a/src/bin/pg_verify_checksums/pg_verify_checksums.c
+++ b/src/bin/pg_verify_checksums/pg_verify_checksums.c
@@ -1,7 +1,7 @@
 /*
  * pg_verify_checksums
  *
- * Verifies page level checksums in an offline cluster
+ * Verifies page level checksums in a cluster
  *
  *	Copyright (c) 2010-2018, PostgreSQL Global Development Group
  *
@@ -26,7 +26,9 @@
 static int64 files = 0;
 static int64 blocks = 0;
 static int64 badblocks = 0;
+static int64 skippedblocks = 0;
 static ControlFileData *ControlFile;
+static XLogRecPtr checkpointLSN;
 
 static char *only_relfilenode = NULL;
 static bool verbose = false;
@@ -122,10 +124,17 @@ scan_file(const char *fn, BlockNumber segmentno)
 	PageHeader	header = (PageHeader) buf.data;
 	int			f;
 	BlockNumber blockno;
+	bool		block_retry = false;
 
 	f = open(fn, O_RDONLY | PG_BINARY, 0);
 	if (f < 0)
 	{
+		if (errno == ENOENT)
+		{
+			/* File was removed in the meantime */
+			return;
+		}
+
 		fprintf(stderr, _("%s: could not open file \"%s\": %s\n"),
 				progname, fn, strerror(errno));
 		exit(1);
@@ -140,26 +149,107 @@ scan_file(const char *fn, BlockNumber segmentno)
 
 		if (r == 0)
 			break;
+		if (r < 0)
+		{
+			fprintf(stderr, _("%s: could not read block %u in file \"%s\": %s\n"),
+					progname, blockno, fn, strerror(errno));
+			return;
+		}
 		if (r != BLCKSZ)
 		{
-			fprintf(stderr, _("%s: could not read block %u in file \"%s\": read %d of %d\n"),
-					progname, blockno, fn, r, BLCKSZ);
-			exit(1);
+			if (block_retry)
+			{
+				/* We already tried once to reread the block, bail out */
+				fprintf(stderr, _("%s: could not read block %u in file \"%s\": read %d of %d\n"),
+						progname, blockno, fn, r, BLCKSZ);
+				exit(1);
+			}
+
+			/*
+			 * Retry the block. It's possible that we read the block while it
+			 * was extended or shrinked, so it it ends up looking torn to us.
+			 */
+
+			/*
+			 * Seek back by the amount of bytes we read to the beginning of
+			 * the failed block.
+			 */
+			if (lseek(f, -r, SEEK_CUR) == -1)
+			{
+				fprintf(stderr, _("%s: could not lseek in file \"%s\": %m\n"),
+						progname, fn);
+				exit(1);
+			}
+
+			/* Set flag so we know a retry was attempted */
+			block_retry = true;
+
+			/* Reset loop to validate the block again */
+			blockno--;
+
+			continue;
 		}
-		blocks++;
 
 		/* New pages have no checksum yet */
 		if (PageIsNew(header))
+		{
+			skippedblocks++;
 			continue;
+		}
+
+		blocks++;
 
 		csum = pg_checksum_page(buf.data, blockno + segmentno * RELSEG_SIZE);
 		if (csum != header->pd_checksum)
 		{
+			/*
+			 * Retry the block on the first failure.  It's
+			 * possible that we read the first 4K page of
+			 * the block just before postgres updated the
+			 * entire block so it ends up looking torn to
+			 * us.  We only need to retry once because the
+			 * LSN should be updated to something we can
+			 * ignore on the next pass.  If the error
+			 * happens again then it is a true validation
+			 * failure.
+			 */
+			if (!block_retry)
+			{
+				/* Seek to the beginning of the failed block */
+				if (lseek(f, -BLCKSZ, SEEK_CUR) == -1)
+				{
+					fprintf(stderr, _("%s: could not lseek in file \"%s\": %m\n"),
+							progname, fn);
+					exit(1);
+				}
+
+				/* Set flag so we know a retry was attempted */
+				block_retry = true;
+
+				/* Reset loop to validate the block again */
+				blockno--;
+
+				continue;
+			}
+
+			/* The checksum verification failed on retry as well.
+			 * Check if the page has been modified since the
+			 * checkpoint and skip it in this case.
+			 */
+			if (PageGetLSN(buf.data) > checkpointLSN)
+			{
+				block_retry = false;
+				blocks--;
+				skippedblocks++;
+				continue;
+			}
+
 			if (ControlFile->data_checksum_version == PG_DATA_CHECKSUM_VERSION)
 				fprintf(stderr, _("%s: checksum verification failed in file \"%s\", block %u: calculated checksum %X but block contains %X\n"),
 						progname, fn, blockno, csum, header->pd_checksum);
 			badblocks++;
 		}
+		block_retry = false;
 	}
 
 	if (verbose)
@@ -195,6 +285,12 @@ scan_directory(const char *basedir, const char *subdir)
 		snprintf(fn, sizeof(fn), "%s/%s", path, de->d_name);
 		if (lstat(fn, &st) < 0)
 		{
+			if (errno == ENOENT)
+			{
+				/* File was removed in the meantime */
+				continue;
+			}
+
 			fprintf(stderr, _("%s: could not stat file \"%s\": %s\n"),
 					progname, fn, strerror(errno));
 			exit(1);
@@ -328,7 +424,7 @@ main(int argc, char *argv[])
 		exit(1);
 	}
 
-	/* Check if cluster is running */
+	/* Check if checksums are enabled */
 	ControlFile = get_controlfile(DataDir, progname, &crc_ok);
 	if (!crc_ok)
 	{
@@ -336,19 +432,15 @@ main(int argc, char *argv[])
 		exit(1);
 	}
 
-	if (ControlFile->state != DB_SHUTDOWNED &&
-		ControlFile->state != DB_SHUTDOWNED_IN_RECOVERY)
-	{
-		fprintf(stderr, _("%s: cluster must be shut down to verify checksums\n"), progname);
-		exit(1);
-	}
-
 	if (ControlFile->data_checksum_version == 0)
 	{
 		fprintf(stderr, _("%s: data checksums are not enabled in cluster\n"), progname);
 		exit(1);
 	}
 
+	/* Get checkpoint LSN */
+	checkpointLSN = ControlFile->checkPoint;
+
 	/* Scan all files */
 	scan_directory(DataDir, "global");
 	scan_directory(DataDir, "base");
@@ -358,6 +450,8 @@ main(int argc, char *argv[])
 	printf(_("Data checksum version: %d\n"), ControlFile->data_checksum_version);
 	printf(_("Files scanned:  %s\n"), psprintf(INT64_FORMAT, files));
 	printf(_("Blocks scanned: %s\n"), psprintf(INT64_FORMAT, blocks));
+	if (skippedblocks > 0)
+		printf(_("Blocks skipped: %s\n"), psprintf(INT64_FORMAT, skippedblocks));
 	printf(_("Bad checksums:  %s\n"), psprintf(INT64_FORMAT, badblocks));
 
 	if (badblocks > 0)
diff --git a/src/bin/pg_verify_checksums/t/002_actions.pl b/src/bin/pg_verify_checksums/t/002_actions.pl
index d59970b735..91adc7fef1 100644
--- a/src/bin/pg_verify_checksums/t/002_actions.pl
+++ b/src/bin/pg_verify_checksums/t/002_actions.pl
@@ -48,10 +48,10 @@ append_to_file "$pgdata/global/99999_vm.123", "";
 command_ok(['pg_verify_checksums',  '-D', $pgdata],
 		   "succeeds with offline cluster");
 
-# Checks cannot happen with an online cluster
+# Checksums pass on an online cluster
 $node->start;
-command_fails(['pg_verify_checksums',  '-D', $pgdata],
-			  "fails with online cluster");
+command_ok(['pg_verify_checksums',  '-D', $pgdata],
+		   "succeeds with online cluster");
 
 # Create table to corrupt and get its relfilenode
 $node->safe_psql('postgres',

Reply via email to