Lately, PostgreSQL has moved many defaults from "bare minimum" more to
the "user friendly by default" side, e.g. hot_standby & replication in
the default configuration, parallelism, and generally higher defaults
for resource knobs like *_mem, autovacuum_* and so on.

I think, the next step in that direction would be to enable data
checksums by default. They make sense in most setups, and people who
plan to run very performance-critical systems where checksums might be
too much need to tune many knobs anyway, and can as well choose to
disable them manually, instead of having everyone else have to enable
them manually. Also, disabling is much easier than enabling.

One argument against checksums used to be that we lack tools to fix
problems with them. But ignore_checksum_failure and the pg_checksums
tool fix that.

The attached patch flips the default in initdb. It also adds a new
option -k --no-data-checksums that wasn't present previously. Docs are
updated to say what the new default is, and the testsuite exercises
the -K option.

Christoph
>From cca069298f1cfc5c95e9e7dde08407a0c05f538a Mon Sep 17 00:00:00 2001
From: Christoph Berg <christoph.b...@credativ.de>
Date: Fri, 22 Mar 2019 14:55:32 +0100
Subject: [PATCH] Enable data checksums by default

---
 doc/src/sgml/config.sgml       |  4 ++--
 doc/src/sgml/ref/initdb.sgml   | 11 +++++++++++
 doc/src/sgml/wal.sgml          |  6 +++---
 src/bin/initdb/initdb.c        | 11 ++++++++---
 src/bin/initdb/t/001_initdb.pl | 22 ++++++++++++++++------
 5 files changed, 40 insertions(+), 14 deletions(-)

diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index d383de2512..bfbe3afbb2 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -2678,8 +2678,8 @@ include_dir 'conf.d'
        </para>
 
        <para>
-        If data checksums are enabled, hint bit updates are always WAL-logged
-        and this setting is ignored. You can use this setting to test how much
+        If data checksums are enabled (which is the default), hint bit updates are always WAL-logged
+        and this setting is ignored. If data checksums are disabled, you can use this setting to test how much
         extra WAL-logging would occur if your database had data checksums
         enabled.
        </para>
diff --git a/doc/src/sgml/ref/initdb.sgml b/doc/src/sgml/ref/initdb.sgml
index 84fb37c293..c22f64ed95 100644
--- a/doc/src/sgml/ref/initdb.sgml
+++ b/doc/src/sgml/ref/initdb.sgml
@@ -219,6 +219,17 @@ PostgreSQL documentation
         may incur a noticeable performance penalty. This option can only
         be set during initialization, and cannot be changed later. If
         set, checksums are calculated for all objects, in all databases.
+        The default is to use data checksums.
+       </para>
+      </listitem>
+     </varlistentry>
+
+     <varlistentry id="app-initdb-no-data-checksums" xreflabel="no data checksums">
+      <term><option>-K</option></term>
+      <term><option>--no-data-checksums</option></term>
+      <listitem>
+       <para>
+        Do not use checksums on data pages.
        </para>
       </listitem>
      </varlistentry>
diff --git a/doc/src/sgml/wal.sgml b/doc/src/sgml/wal.sgml
index 4eb8feb903..2790e021e5 100644
--- a/doc/src/sgml/wal.sgml
+++ b/doc/src/sgml/wal.sgml
@@ -193,10 +193,10 @@
     </listitem>
     <listitem>
      <para>
-      Data pages are not currently checksummed by default, though full page images
+      Data pages are checksummed by default. Additionally, full page images
       recorded in WAL records will be protected; see <link
-      linkend="app-initdb-data-checksums"><application>initdb</application></link>
-      for details about enabling data page checksums.
+      linkend="app-initdb-no-data-checksums"><application>initdb</application></link>
+      for details about disabling data page checksums which might be beneficial for performance.
      </para>
     </listitem>
     <listitem>
diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c
index 4886090132..e7aef0b2c5 100644
--- a/src/bin/initdb/initdb.c
+++ b/src/bin/initdb/initdb.c
@@ -140,7 +140,7 @@ static bool noclean = false;
 static bool do_sync = true;
 static bool sync_only = false;
 static bool show_setting = false;
-static bool data_checksums = false;
+static bool data_checksums = true;
 static char *xlog_dir = NULL;
 static char *str_wal_segment_size_mb = NULL;
 static int	wal_segment_size_mb;
@@ -2408,7 +2408,8 @@ usage(const char *progname)
 	printf(_("      --wal-segsize=SIZE    size of WAL segments, in megabytes\n"));
 	printf(_("\nLess commonly used options:\n"));
 	printf(_("  -d, --debug               generate lots of debugging output\n"));
-	printf(_("  -k, --data-checksums      use data page checksums\n"));
+	printf(_("  -k, --data-checksums      use data page checksums (default)\n"));
+	printf(_("  -K, --no-data-checksums   do not use data page checksums\n"));
 	printf(_("  -L DIRECTORY              where to find the input files\n"));
 	printf(_("  -n, --no-clean            do not clean up after errors\n"));
 	printf(_("  -N, --no-sync             do not wait for changes to be written safely to disk\n"));
@@ -3100,6 +3101,7 @@ main(int argc, char *argv[])
 		{"waldir", required_argument, NULL, 'X'},
 		{"wal-segsize", required_argument, NULL, 12},
 		{"data-checksums", no_argument, NULL, 'k'},
+		{"no-data-checksums", no_argument, NULL, 'K'},
 		{"allow-group-access", no_argument, NULL, 'g'},
 		{NULL, 0, NULL, 0}
 	};
@@ -3142,7 +3144,7 @@ main(int argc, char *argv[])
 
 	/* process command-line options */
 
-	while ((c = getopt_long(argc, argv, "dD:E:kL:nNU:WA:sST:X:g", long_options, &option_index)) != -1)
+	while ((c = getopt_long(argc, argv, "dD:E:kKL:nNU:WA:sST:X:g", long_options, &option_index)) != -1)
 	{
 		switch (c)
 		{
@@ -3194,6 +3196,9 @@ main(int argc, char *argv[])
 			case 'k':
 				data_checksums = true;
 				break;
+			case 'K':
+				data_checksums = false;
+				break;
 			case 'L':
 				share_path = pg_strdup(optarg);
 				break;
diff --git a/src/bin/initdb/t/001_initdb.pl b/src/bin/initdb/t/001_initdb.pl
index 8dfcd8752a..2677706a80 100644
--- a/src/bin/initdb/t/001_initdb.pl
+++ b/src/bin/initdb/t/001_initdb.pl
@@ -8,7 +8,7 @@ use Fcntl ':mode';
 use File::stat qw{lstat};
 use PostgresNode;
 use TestLib;
-use Test::More tests => 22;
+use Test::More tests => 26;
 
 my $tempdir = TestLib::tempdir;
 my $xlogdir = "$tempdir/pgxlog";
@@ -59,20 +59,30 @@ mkdir $datadir;
 	}
 }
 
-# Control file should tell that data checksums are disabled by default.
+command_ok([ 'initdb', '-S', $datadir ], 'sync only');
+command_fails([ 'initdb', $datadir ], 'existing data directory');
+
+# Control file should tell that data checksums are enabled by default.
 command_like(['pg_controldata', $datadir],
+			 qr/Data page checksum version:.*1/,
+			 'checksums are enabled in control file');
+
+# Test with checksums disabled.
+my $datadir_nochksum = "$tempdir/data_nochksum";
+mkdir $datadir_nochksum;
+command_ok([ 'initdb', '-K', '-N', $datadir_nochksum ], 'successful creation without checksums');
+
+# Control file should tell that data checksums are disabled.
+command_like(['pg_controldata', $datadir_nochksum],
 			 qr/Data page checksum version:.*0/,
 			 'checksums are disabled in control file');
 # pg_checksums fails with checksums disabled by default.  This is
 # not part of the tests included in pg_checksums to save from
 # the creation of an extra instance.
 command_fails(
-	[ 'pg_checksums', '-D', $datadir],
+	[ 'pg_checksums', '-D', $datadir_nochksum],
 	"pg_checksums fails with data checksum disabled");
 
-command_ok([ 'initdb', '-S', $datadir ], 'sync only');
-command_fails([ 'initdb', $datadir ], 'existing data directory');
-
 # Check group access on PGDATA
 SKIP:
 {
-- 
2.20.1

Reply via email to