Please find attached a patch to enable data checksums by default.

Currently, initdb only enables data checksums if passed the
--data-checksums or -k argument. There was some hesitation years ago when
this feature was first added, leading to the current situation where the
default is off. However, many years later, there is wide consensus that
this is an extraordinarily safe, desirable setting. Indeed, most (if not
all) of the major commercial and open source Postgres systems currently
turn this on by default. I posit you would be hard-pressed to find many
systems these days in which it has NOT been turned on. So basically we have
a de-facto standard, and I think it's time we flipped the switch to make it
on by default.

The patch is simple enough: literally flipping the boolean inside of
initdb.c, and adding a new argument '--no-data-checksums' for those
instances that truly want the old behavior. One place that still needs the
old behavior is our internal tests for pg_checksums and pg_amcheck, so I
added a new argument to init() in PostgreSQL/Test/Cluster.pm to allow those
to still pass their tests.

This is just the default - people are still more than welcome to turn it
off with the new flag. The pg_checksums program is another option that
actually argues for having the default "on", as switching to "off" once
initdb has been run is trivial.

Yes, I am aware of the previous discussions on this, but the world moves
fast - wal compression is better than in the past, vacuum is better now,
and data-checksums being on is such a complete default in the wild, it
feels weird and a disservice that we are not running all our tests like
that.

Cheers,
Greg
From 12ce067f5ba64414d1d14c5f2e763d04cdfacd13 Mon Sep 17 00:00:00 2001
From: Greg Sabino Mullane <greg@turnstep.com>
Date: Tue, 6 Aug 2024 18:18:56 -0400
Subject: [PATCH] Make initdb enable data checksums by default

---
 doc/src/sgml/ref/initdb.sgml              |  4 ++-
 src/bin/initdb/initdb.c                   |  6 ++++-
 src/bin/initdb/t/001_initdb.pl            | 31 +++++++++++++++++------
 src/bin/pg_amcheck/t/003_check.pl         |  2 +-
 src/bin/pg_amcheck/t/004_verify_heapam.pl |  2 +-
 src/bin/pg_checksums/t/002_actions.pl     |  2 +-
 src/test/perl/PostgreSQL/Test/Cluster.pm  |  7 +++++
 7 files changed, 41 insertions(+), 13 deletions(-)

diff --git a/doc/src/sgml/ref/initdb.sgml b/doc/src/sgml/ref/initdb.sgml
index bdd613e77f..511f489d34 100644
--- a/doc/src/sgml/ref/initdb.sgml
+++ b/doc/src/sgml/ref/initdb.sgml
@@ -267,12 +267,14 @@ PostgreSQL documentation
        <para>
         Use checksums on data pages to help detect corruption by the
         I/O system that would otherwise be silent. Enabling checksums
-        may incur a noticeable performance penalty. If set, checksums
+        may incur a small performance penalty. If set, checksums
         are calculated for all objects, in all databases. All checksum
         failures will be reported in the
         <link linkend="monitoring-pg-stat-database-view">
         <structname>pg_stat_database</structname></link> view.
         See <xref linkend="checksums" /> for details.
+        As of version 18, checksums are enabled by default. They can be
+        disabled by use of <option>--no-data-checksums</option>.
        </para>
       </listitem>
      </varlistentry>
diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c
index f00718a015..ce7d3e99e5 100644
--- a/src/bin/initdb/initdb.c
+++ b/src/bin/initdb/initdb.c
@@ -164,7 +164,7 @@ static bool noinstructions = false;
 static bool do_sync = true;
 static bool sync_only = false;
 static bool show_setting = false;
-static bool data_checksums = false;
+static bool data_checksums = true;
 static char *xlog_dir = NULL;
 static int	wal_segment_size_mb = (DEFAULT_XLOG_SEG_SIZE) / (1024 * 1024);
 static DataDirSyncMethod sync_method = DATA_DIR_SYNC_METHOD_FSYNC;
@@ -3121,6 +3121,7 @@ main(int argc, char *argv[])
 		{"waldir", required_argument, NULL, 'X'},
 		{"wal-segsize", required_argument, NULL, 12},
 		{"data-checksums", no_argument, NULL, 'k'},
+		{"no-data-checksums", no_argument, NULL, 20},
 		{"allow-group-access", no_argument, NULL, 'g'},
 		{"discard-caches", no_argument, NULL, 14},
 		{"locale-provider", required_argument, NULL, 15},
@@ -3319,6 +3320,9 @@ main(int argc, char *argv[])
 				if (!parse_sync_method(optarg, &sync_method))
 					exit(1);
 				break;
+			case 20:
+				data_checksums = false;
+				break;
 			default:
 				/* getopt_long already emitted a complaint */
 				pg_log_error_hint("Try \"%s --help\" for more information.", progname);
diff --git a/src/bin/initdb/t/001_initdb.pl b/src/bin/initdb/t/001_initdb.pl
index 06a35ac0b7..64395ec531 100644
--- a/src/bin/initdb/t/001_initdb.pl
+++ b/src/bin/initdb/t/001_initdb.pl
@@ -69,16 +69,11 @@ mkdir $datadir;
 	}
 }
 
-# Control file should tell that data checksums are disabled by default.
+# Control file should tell that data checksums are enabled by default.
 command_like(
 	[ 'pg_controldata', $datadir ],
-	qr/Data page checksum version:.*0/,
-	'checksums are disabled in control file');
-# pg_checksums fails with checksums disabled by default.  This is
-# not part of the tests included in pg_checksums to save from
-# the creation of an extra instance.
-command_fails([ 'pg_checksums', '-D', $datadir ],
-	"pg_checksums fails with data checksum disabled");
+	qr/Data page checksum version:.*1/,
+	'checksums are enabled in control file');
 
 command_ok([ 'initdb', '-S', $datadir ], 'sync only');
 command_fails([ 'initdb', $datadir ], 'existing data directory');
@@ -268,4 +263,24 @@ ok($conf !~ qr/^WORK_MEM = /m, "WORK_MEM should not be configured");
 ok($conf !~ qr/^Work_Mem = /m, "Work_Mem should not be configured");
 ok($conf =~ qr/^work_mem = 512/m, "work_mem should be in config");
 
+# Test the no-data-checksums flag
+
+my $datadir_nochecksums = "$tempdir/data_no_checksums";
+
+command_ok([ 'initdb', '--no-data-checksums', $datadir_nochecksums ],
+	'successful creation without data checksums');
+
+# Control file should tell that data checksums are disabled.
+command_like(
+	[ 'pg_controldata', $datadir_nochecksums ],
+	qr/Data page checksum version:.*0/,
+	'checksums are disabled in control file');
+
+# pg_checksums fails with checksums disabled. This is
+# not part of the tests included in pg_checksums to save from
+# the creation of an extra instance.
+command_fails(
+	[ 'pg_checksums', '-D', $datadir_nochecksums ],
+	"pg_checksums fails with data checksum disabled");
+
 done_testing();
diff --git a/src/bin/pg_amcheck/t/003_check.pl b/src/bin/pg_amcheck/t/003_check.pl
index 4b16bda6a4..a45b14662f 100644
--- a/src/bin/pg_amcheck/t/003_check.pl
+++ b/src/bin/pg_amcheck/t/003_check.pl
@@ -120,7 +120,7 @@ sub perform_all_corruptions()
 
 # Test set-up
 $node = PostgreSQL::Test::Cluster->new('test');
-$node->init;
+$node->init(no_checksums => 1);
 $node->append_conf('postgresql.conf', 'autovacuum=off');
 $node->start;
 $port = $node->port;
diff --git a/src/bin/pg_amcheck/t/004_verify_heapam.pl b/src/bin/pg_amcheck/t/004_verify_heapam.pl
index f6d2c5f787..2c17f7d068 100644
--- a/src/bin/pg_amcheck/t/004_verify_heapam.pl
+++ b/src/bin/pg_amcheck/t/004_verify_heapam.pl
@@ -181,7 +181,7 @@ my $aborted_xid;
 # autovacuum workers visiting the table could crash the backend.
 # Disable autovacuum so that won't happen.
 my $node = PostgreSQL::Test::Cluster->new('test');
-$node->init;
+$node->init(no_checksums => 1);
 $node->append_conf('postgresql.conf', 'autovacuum=off');
 $node->append_conf('postgresql.conf', 'max_prepared_transactions=10');
 
diff --git a/src/bin/pg_checksums/t/002_actions.pl b/src/bin/pg_checksums/t/002_actions.pl
index 33e7fb53c5..c136febbbb 100644
--- a/src/bin/pg_checksums/t/002_actions.pl
+++ b/src/bin/pg_checksums/t/002_actions.pl
@@ -88,7 +88,7 @@ sub check_relation_corruption
 
 # Initialize node with checksums disabled.
 my $node = PostgreSQL::Test::Cluster->new('node_checksum');
-$node->init();
+$node->init(no_checksums => 1);
 my $pgdata = $node->data_dir;
 
 # Control file should know that checksums are disabled.
diff --git a/src/test/perl/PostgreSQL/Test/Cluster.pm b/src/test/perl/PostgreSQL/Test/Cluster.pm
index 32ee98aebc..632a2c9ebc 100644
--- a/src/test/perl/PostgreSQL/Test/Cluster.pm
+++ b/src/test/perl/PostgreSQL/Test/Cluster.pm
@@ -504,6 +504,8 @@ On Windows, we use SSPI authentication to ensure the same (by pg_regress
 WAL archiving can be enabled on this node by passing the keyword parameter
 has_archiving => 1. This is disabled by default.
 
+Data checksums can be forced off by passing no_checksums => 1.
+
 postgresql.conf can be set up for replication by passing the keyword
 parameter allows_streaming => 'logical' or 'physical' (passing 1 will also
 suffice for physical replication) depending on type of replication that
@@ -530,6 +532,11 @@ sub init
 	$params{force_initdb} = 0 unless defined $params{force_initdb};
 	$params{has_archiving} = 0 unless defined $params{has_archiving};
 
+	if (defined $params{no_checksums})
+	{
+		push @{ $params{extra} }, '--no-data-checksums';
+	}
+
 	my $initdb_extra_opts_env = $ENV{PG_TEST_INITDB_EXTRA_OPTS};
 	if (defined $initdb_extra_opts_env)
 	{
-- 

Reply via email to