On Fri, Oct 23, 2020 at 08:00:08AM +0900, Michael Paquier wrote:
> Yeah, we could try to make the logic a bit more complicated like
> that.  However, for any code path relying on a page read without any
> locking insurance, we cannot really have a lot of trust in any of the
> fields assigned to the page as this could just be random corruption
> garbage, and the only thing I am ready to trust here a checksum
> mismatch check, because that's the only field on the page that's
> linked to its full contents on the 8k page.  This also keeps the code
> simpler.

A small update here.  I have extracted the refactored part for
PageIsVerified() and committed it as that's independently useful.
This makes the patch proposed here simpler on HEAD, leading to the
attached.
--
Michael
diff --git a/src/backend/replication/basebackup.c b/src/backend/replication/basebackup.c
index b89df01fa7..37b60437a6 100644
--- a/src/backend/replication/basebackup.c
+++ b/src/backend/replication/basebackup.c
@@ -1569,21 +1569,24 @@ sendFile(const char *readfilename, const char *tarfilename,
 {
 	int			fd;
 	BlockNumber blkno = 0;
-	bool		block_retry = false;
+	int			block_attempts = 0;
 	char		buf[TAR_SEND_SIZE];
-	uint16		checksum;
 	int			checksum_failures = 0;
 	off_t		cnt;
 	int			i;
 	pgoff_t		len = 0;
 	char	   *page;
 	size_t		pad;
-	PageHeader	phdr;
 	int			segmentno = 0;
 	char	   *segmentpath;
 	bool		verify_checksum = false;
 	pg_checksum_context checksum_ctx;
 
+	/* Maximum number of checksum failures reported for this file */
+#define CHECKSUM_REPORT_THRESHOLD		5
+	/* maximum number of retries done for a single page */
+#define PAGE_RETRY_THRESHOLD			5
+
 	pg_checksum_init(&checksum_ctx, manifest->checksum_type);
 
 	fd = OpenTransientFile(readfilename, O_RDONLY | PG_BINARY);
@@ -1661,9 +1664,7 @@ sendFile(const char *readfilename, const char *tarfilename,
 		if (verify_checksum && (cnt % BLCKSZ != 0))
 		{
 			ereport(WARNING,
-					(errmsg("could not verify checksum in file \"%s\", block "
-							"%d: read buffer size %d and page size %d "
-							"differ",
+					(errmsg("could not verify checksum in file \"%s\", block %u: read buffer size %d and page size %d differ",
 							readfilename, blkno, (int) cnt, BLCKSZ)));
 			verify_checksum = false;
 		}
@@ -1675,78 +1676,75 @@ sendFile(const char *readfilename, const char *tarfilename,
 				page = buf + BLCKSZ * i;
 
 				/*
-				 * Only check pages which have not been modified since the
-				 * start of the base backup. Otherwise, they might have been
-				 * written only halfway and the checksum would not be valid.
-				 * However, replaying WAL would reinstate the correct page in
-				 * this case. We also skip completely new pages, since they
-				 * don't have a checksum yet.
+				 * Check on-disk pages with the same set of verification
+				 * conditions used before loading pages into shared buffers.
+				 * Note that PageIsVerifiedExtended() considers pages filled
+				 * only with zeros as valid, since they don't have a checksum
+				 * yet.  Failures are not reported to pgstat yet, as these are
+				 * reported in a single batch once a file is completed.  It
+				 * could be possible, that a page is written halfway while
+				 * doing this check, causing a false positive.  If that
+				 * happens, a page is retried multiple times, with an error
+				 * reported if the second attempt also fails.
 				 */
-				if (!PageIsNew(page) && PageGetLSN(page) < startptr)
+				if (PageIsVerifiedExtended(page, blkno, 0))
 				{
-					checksum = pg_checksum_page((char *) page, blkno + segmentno * RELSEG_SIZE);
-					phdr = (PageHeader) page;
-					if (phdr->pd_checksum != checksum)
+					/* success, move on to the next block */
+					blkno++;
+					block_attempts = 0;
+					continue;
+				}
+
+				/* The verification of a page has failed, retry again */
+				if (block_attempts < PAGE_RETRY_THRESHOLD)
+				{
+					int			reread_cnt;
+
+					/* Reread the failed block */
+					reread_cnt =
+						basebackup_read_file(fd, buf + BLCKSZ * i,
+											 BLCKSZ, len + BLCKSZ * i,
+											 readfilename,
+											 false);
+					if (reread_cnt == 0)
 					{
 						/*
-						 * Retry the block on the first failure.  It's
-						 * possible that we read the first 4K page of the
-						 * block just before postgres updated the entire block
-						 * so it ends up looking torn to us.  We only need to
-						 * retry once because the LSN should be updated to
-						 * something we can ignore on the next pass.  If the
-						 * error happens again then it is a true validation
-						 * failure.
+						 * If we hit end-of-file, a concurrent truncation must
+						 * have occurred, so break out of this loop just as if
+						 * the initial fread() returned 0. We'll drop through
+						 * to the same code that handles that case. (We must
+						 * fix up cnt first, though.)
 						 */
-						if (block_retry == false)
-						{
-							int			reread_cnt;
-
-							/* Reread the failed block */
-							reread_cnt =
-								basebackup_read_file(fd, buf + BLCKSZ * i,
-													 BLCKSZ, len + BLCKSZ * i,
-													 readfilename,
-													 false);
-							if (reread_cnt == 0)
-							{
-								/*
-								 * If we hit end-of-file, a concurrent
-								 * truncation must have occurred, so break out
-								 * of this loop just as if the initial fread()
-								 * returned 0. We'll drop through to the same
-								 * code that handles that case. (We must fix
-								 * up cnt first, though.)
-								 */
-								cnt = BLCKSZ * i;
-								break;
-							}
-
-							/* Set flag so we know a retry was attempted */
-							block_retry = true;
-
-							/* Reset loop to validate the block again */
-							i--;
-							continue;
-						}
-
-						checksum_failures++;
-
-						if (checksum_failures <= 5)
-							ereport(WARNING,
-									(errmsg("checksum verification failed in "
-											"file \"%s\", block %d: calculated "
-											"%X but expected %X",
-											readfilename, blkno, checksum,
-											phdr->pd_checksum)));
-						if (checksum_failures == 5)
-							ereport(WARNING,
-									(errmsg("further checksum verification "
-											"failures in file \"%s\" will not "
-											"be reported", readfilename)));
+						cnt = BLCKSZ * i;
+						break;
 					}
+
+					/* Set the counter so we know a retry was attempted */
+					block_attempts++;
+
+					/*
+					 * Wait for 100 ms to give some room to any parallel page
+					 * write that may have caused this retry to finish.
+					 */
+					pg_usleep(100000L);
+
+					/* Reset loop to validate the block again */
+					i--;
+					continue;
 				}
-				block_retry = false;
+
+				checksum_failures++;
+				if (checksum_failures <= CHECKSUM_REPORT_THRESHOLD)
+					ereport(WARNING,
+							(errmsg("checksum verification failed in block %u of file \"%s\"",
+									blkno, readfilename)));
+				if (checksum_failures == CHECKSUM_REPORT_THRESHOLD)
+					ereport(WARNING,
+							(errmsg("further checksum verification failures in file \"%s\" will not be reported",
+									readfilename)));
+
+				/* move to next block */
+				block_attempts = 0;
 				blkno++;
 			}
 		}
diff --git a/src/bin/pg_basebackup/t/010_pg_basebackup.pl b/src/bin/pg_basebackup/t/010_pg_basebackup.pl
index f674a7c94e..a46a9ece42 100644
--- a/src/bin/pg_basebackup/t/010_pg_basebackup.pl
+++ b/src/bin/pg_basebackup/t/010_pg_basebackup.pl
@@ -6,7 +6,7 @@ use File::Basename qw(basename dirname);
 use File::Path qw(rmtree);
 use PostgresNode;
 use TestLib;
-use Test::More tests => 109;
+use Test::More tests => 112;
 
 program_help_ok('pg_basebackup');
 program_version_ok('pg_basebackup');
@@ -528,6 +528,24 @@ $node->command_checks_all(
 	'pg_basebackup reports checksum mismatch');
 rmtree("$tempdir/backup_corrupt");
 
+# Create a new type of corruption and zero out one page header
+# completely.
+open $file, '+<', "$pgdata/$file_corrupt1";
+seek($file, $pageheader_size, 0);
+system_or_bail 'pg_ctl', '-D', $pgdata, 'stop';
+my $zero_data = '\0' x $pageheader_size;
+syswrite($file, $zero_data);
+close $file;
+system_or_bail 'pg_ctl', '-D', $pgdata, 'start';
+
+$node->command_checks_all(
+	[ 'pg_basebackup', '-D', "$tempdir/backup_corrupt1a" ],
+	1,
+	[qr{^$}],
+	[qr/^WARNING.*checksum verification failed/s],
+	"pg_basebackup reports checksum mismatch for zeroed page headers");
+rmtree("$tempdir/backup_corrupt1a");
+
 # induce further corruption in 5 more blocks
 system_or_bail 'pg_ctl', '-D', $pgdata, 'stop';
 open $file, '+<', "$pgdata/$file_corrupt1";

Attachment: signature.asc
Description: PGP signature

Reply via email to