Hi,

some installations have data which is only rarerly read, and if they are
so large that dumps are not routinely taken, data corruption would only
be detected with some large delay even with checksums enabled.

The attached small patch verifies checksums (in case they are enabled)
during a basebackup. The rationale is that we are reading every block in
this case anyway, so this is a good opportunity to check them as well.
Other and complementary ways of checking the checksums are possible of
course, like the offline checking tool that Magnus just submitted.

It probably makes sense to use the same approach for determining the
segment numbers as Magnus did in his patch, or refactor that out in a
utility function, but I'm sick right now so wanted to submit this for
v11 first.

I did some light benchmarking and it seems that the performance
degradation is minimal, but this could well be platform or
architecture-dependent. Right now, the checksums are always checked but
maybe this could be made optional, probably by extending the replication
protocol.


Michael

-- 
Michael Banck
Projektleiter / Senior Berater
Tel.: +49 2166 9901-171
Fax:  +49 2166 9901-100
Email: michael.ba...@credativ.de

credativ GmbH, HRB Mönchengladbach 12080
USt-ID-Nummer: DE204566209
Trompeterallee 108, 41189 Mönchengladbach
Geschäftsführung: Dr. Michael Meskes, Jörg Folz, Sascha Heuer
diff --git a/src/backend/replication/basebackup.c b/src/backend/replication/basebackup.c
index 185f32a5f9..841471cfce 100644
--- a/src/backend/replication/basebackup.c
+++ b/src/backend/replication/basebackup.c
@@ -16,6 +16,7 @@
 #include <unistd.h>
 #include <time.h>
 
+#include "access/xlog.h"
 #include "access/xlog_internal.h"	/* for pg_start/stop_backup */
 #include "catalog/catalog.h"
 #include "catalog/pg_type.h"
@@ -30,6 +31,8 @@
 #include "replication/basebackup.h"
 #include "replication/walsender.h"
 #include "replication/walsender_private.h"
+#include "storage/bufpage.h"
+#include "storage/checksum.h"
 #include "storage/dsm_impl.h"
 #include "storage/fd.h"
 #include "storage/ipc.h"
@@ -1199,10 +1202,18 @@ sendFile(const char *readfilename, const char *tarfilename, struct stat *statbuf
 		 bool missing_ok)
 {
 	FILE	   *fp;
+	BlockNumber blkno = 0;
 	char		buf[TAR_SEND_SIZE];
+	uint16		checksum;
 	size_t		cnt;
+	char	   *filename;
+	int			i;
 	pgoff_t		len = 0;
+	char		page[BLCKSZ];
 	size_t		pad;
+	PageHeader  phdr;
+	BlockNumber segno = 0;
+	bool		verify_checksum;
 
 	fp = AllocateFile(readfilename, "rb");
 	if (fp == NULL)
@@ -1214,10 +1225,54 @@ sendFile(const char *readfilename, const char *tarfilename, struct stat *statbuf
 				 errmsg("could not open file \"%s\": %m", readfilename)));
 	}
 
+	/*
+	 * Verify checksums only when checksums are enabled, and the file to
+	 * send is either in one of the default tablespaces (`./global' or
+	 * `./base'), or in an external tablespace with an absolute pathname
+	 * (starting with `/').
+	 */
+	if (DataChecksumsEnabled() &&
+		(strncmp(readfilename, "./global/", 9) == 0 ||
+		strncmp(readfilename, "./base/", 7) == 0 ||
+		strncmp(readfilename, "/", 1) == 0))
+		verify_checksum = 1;
+	else
+		verify_checksum = 0;
+
 	_tarWriteHeader(tarfilename, NULL, statbuf, false);
 
+	/*
+	 * Determine segment number for possible checksum verification, as block
+	 * numbers are ongoing. The initial block number of a multi-segment file is
+	 * the segment number times RELSEG_SIZE.
+	 */
+	filename = basename(readfilename);
+	if (strstr(filename, "."))
+		segno = atoi(strstr(filename, ".")+1);
+	blkno = segno * RELSEG_SIZE;
+
 	while ((cnt = fread(buf, 1, Min(sizeof(buf), statbuf->st_size - len), fp)) > 0)
 	{
+		if (verify_checksum)
+		{
+			/*
+			 * The checksums are verified at block level, so we iterate over
+			 * the buffer in chunks of BLCKSZ.
+			 */
+			for (i = 0; i < cnt / BLCKSZ; i++)
+			{
+				memcpy(page, (buf + BLCKSZ * i), BLCKSZ);
+				checksum = pg_checksum_page((char *) page, blkno);
+				phdr = (PageHeader) page;
+				if (phdr->pd_checksum != checksum)
+					ereport(WARNING,
+							(errmsg("checksum mismatch in file \"%s\", block %d: "
+							"expected %x, found %x", readfilename, blkno,
+							checksum, phdr->pd_checksum)));
+				blkno++;
+			}
+		}
+
 		/* Send the chunk as a CopyData message */
 		if (pq_putmessage('d', buf, cnt))
 			ereport(ERROR,
diff --git a/src/bin/pg_basebackup/t/010_pg_basebackup.pl b/src/bin/pg_basebackup/t/010_pg_basebackup.pl
index cdf4f5be37..b410311791 100644
--- a/src/bin/pg_basebackup/t/010_pg_basebackup.pl
+++ b/src/bin/pg_basebackup/t/010_pg_basebackup.pl
@@ -4,7 +4,7 @@ use Cwd;
 use Config;
 use PostgresNode;
 use TestLib;
-use Test::More tests => 79;
+use Test::More tests => 83;
 
 program_help_ok('pg_basebackup');
 program_version_ok('pg_basebackup');
@@ -15,7 +15,7 @@ my $tempdir = TestLib::tempdir;
 my $node = get_new_node('main');
 
 # Initialize node without replication settings
-$node->init;
+$node->init(extra => [ '--data-checksums' ]);
 $node->start;
 my $pgdata = $node->data_dir;
 
@@ -317,3 +317,19 @@ like(
 	slurp_file("$tempdir/backupxs_sl_R/recovery.conf"),
 	qr/^primary_slot_name = 'slot1'\n/m,
 	'recovery.conf sets primary_slot_name');
+
+my $checksum = $node->safe_psql('postgres', 'SHOW data_checksums;');
+is($checksum, 'on', 'checksums are enabled');
+
+# induce corruption
+my $pg_class = $node->safe_psql('postgres',
+	q{SELECT pg_relation_filepath('pg_class')}
+);
+system_or_bail 'dd', 'conv=notrunc', 'oflag=seek_bytes', 'seek=4000', 'bs=9', 'count=1', 'if=/dev/zero', "of=$pgdata/$pg_class";
+
+$node->command_checks_all([ 'pg_basebackup', '-D', "$tempdir/backup_corrupt"],
+	0,
+	[qr{^$}],
+	[qr/^WARNING.*checksum.mismatch/s],
+	'pg_basebackup reports checksum mismatch'
+);

Reply via email to