Please find attached a patch to enable data checksums by default. Currently, initdb only enables data checksums if passed the --data-checksums or -k argument. There was some hesitation years ago when this feature was first added, leading to the current situation where the default is off. However, many years later, there is wide consensus that this is an extraordinarily safe, desirable setting. Indeed, most (if not all) of the major commercial and open source Postgres systems currently turn this on by default. I posit you would be hard-pressed to find many systems these days in which it has NOT been turned on. So basically we have a de-facto standard, and I think it's time we flipped the switch to make it on by default.
The patch is simple enough: literally flipping the boolean inside of initdb.c, and adding a new argument '--no-data-checksums' for those instances that truly want the old behavior. One place that still needs the old behavior is our internal tests for pg_checksums and pg_amcheck, so I added a new argument to init() in PostgreSQL/Test/Cluster.pm to allow those to still pass their tests. This is just the default - people are still more than welcome to turn it off with the new flag. The pg_checksums program is another option that actually argues for having the default "on", as switching to "off" once initdb has been run is trivial. Yes, I am aware of the previous discussions on this, but the world moves fast - wal compression is better than in the past, vacuum is better now, and data-checksums being on is such a complete default in the wild, it feels weird and a disservice that we are not running all our tests like that. Cheers, Greg
From 12ce067f5ba64414d1d14c5f2e763d04cdfacd13 Mon Sep 17 00:00:00 2001 From: Greg Sabino Mullane <greg@turnstep.com> Date: Tue, 6 Aug 2024 18:18:56 -0400 Subject: [PATCH] Make initdb enable data checksums by default --- doc/src/sgml/ref/initdb.sgml | 4 ++- src/bin/initdb/initdb.c | 6 ++++- src/bin/initdb/t/001_initdb.pl | 31 +++++++++++++++++------ src/bin/pg_amcheck/t/003_check.pl | 2 +- src/bin/pg_amcheck/t/004_verify_heapam.pl | 2 +- src/bin/pg_checksums/t/002_actions.pl | 2 +- src/test/perl/PostgreSQL/Test/Cluster.pm | 7 +++++ 7 files changed, 41 insertions(+), 13 deletions(-) diff --git a/doc/src/sgml/ref/initdb.sgml b/doc/src/sgml/ref/initdb.sgml index bdd613e77f..511f489d34 100644 --- a/doc/src/sgml/ref/initdb.sgml +++ b/doc/src/sgml/ref/initdb.sgml @@ -267,12 +267,14 @@ PostgreSQL documentation <para> Use checksums on data pages to help detect corruption by the I/O system that would otherwise be silent. Enabling checksums - may incur a noticeable performance penalty. If set, checksums + may incur a small performance penalty. If set, checksums are calculated for all objects, in all databases. All checksum failures will be reported in the <link linkend="monitoring-pg-stat-database-view"> <structname>pg_stat_database</structname></link> view. See <xref linkend="checksums" /> for details. + As of version 18, checksums are enabled by default. They can be + disabled by use of <option>--no-data-checksums</option>. </para> </listitem> </varlistentry> diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c index f00718a015..ce7d3e99e5 100644 --- a/src/bin/initdb/initdb.c +++ b/src/bin/initdb/initdb.c @@ -164,7 +164,7 @@ static bool noinstructions = false; static bool do_sync = true; static bool sync_only = false; static bool show_setting = false; -static bool data_checksums = false; +static bool data_checksums = true; static char *xlog_dir = NULL; static int wal_segment_size_mb = (DEFAULT_XLOG_SEG_SIZE) / (1024 * 1024); static DataDirSyncMethod sync_method = DATA_DIR_SYNC_METHOD_FSYNC; @@ -3121,6 +3121,7 @@ main(int argc, char *argv[]) {"waldir", required_argument, NULL, 'X'}, {"wal-segsize", required_argument, NULL, 12}, {"data-checksums", no_argument, NULL, 'k'}, + {"no-data-checksums", no_argument, NULL, 20}, {"allow-group-access", no_argument, NULL, 'g'}, {"discard-caches", no_argument, NULL, 14}, {"locale-provider", required_argument, NULL, 15}, @@ -3319,6 +3320,9 @@ main(int argc, char *argv[]) if (!parse_sync_method(optarg, &sync_method)) exit(1); break; + case 20: + data_checksums = false; + break; default: /* getopt_long already emitted a complaint */ pg_log_error_hint("Try \"%s --help\" for more information.", progname); diff --git a/src/bin/initdb/t/001_initdb.pl b/src/bin/initdb/t/001_initdb.pl index 06a35ac0b7..64395ec531 100644 --- a/src/bin/initdb/t/001_initdb.pl +++ b/src/bin/initdb/t/001_initdb.pl @@ -69,16 +69,11 @@ mkdir $datadir; } } -# Control file should tell that data checksums are disabled by default. +# Control file should tell that data checksums are enabled by default. command_like( [ 'pg_controldata', $datadir ], - qr/Data page checksum version:.*0/, - 'checksums are disabled in control file'); -# pg_checksums fails with checksums disabled by default. This is -# not part of the tests included in pg_checksums to save from -# the creation of an extra instance. -command_fails([ 'pg_checksums', '-D', $datadir ], - "pg_checksums fails with data checksum disabled"); + qr/Data page checksum version:.*1/, + 'checksums are enabled in control file'); command_ok([ 'initdb', '-S', $datadir ], 'sync only'); command_fails([ 'initdb', $datadir ], 'existing data directory'); @@ -268,4 +263,24 @@ ok($conf !~ qr/^WORK_MEM = /m, "WORK_MEM should not be configured"); ok($conf !~ qr/^Work_Mem = /m, "Work_Mem should not be configured"); ok($conf =~ qr/^work_mem = 512/m, "work_mem should be in config"); +# Test the no-data-checksums flag + +my $datadir_nochecksums = "$tempdir/data_no_checksums"; + +command_ok([ 'initdb', '--no-data-checksums', $datadir_nochecksums ], + 'successful creation without data checksums'); + +# Control file should tell that data checksums are disabled. +command_like( + [ 'pg_controldata', $datadir_nochecksums ], + qr/Data page checksum version:.*0/, + 'checksums are disabled in control file'); + +# pg_checksums fails with checksums disabled. This is +# not part of the tests included in pg_checksums to save from +# the creation of an extra instance. +command_fails( + [ 'pg_checksums', '-D', $datadir_nochecksums ], + "pg_checksums fails with data checksum disabled"); + done_testing(); diff --git a/src/bin/pg_amcheck/t/003_check.pl b/src/bin/pg_amcheck/t/003_check.pl index 4b16bda6a4..a45b14662f 100644 --- a/src/bin/pg_amcheck/t/003_check.pl +++ b/src/bin/pg_amcheck/t/003_check.pl @@ -120,7 +120,7 @@ sub perform_all_corruptions() # Test set-up $node = PostgreSQL::Test::Cluster->new('test'); -$node->init; +$node->init(no_checksums => 1); $node->append_conf('postgresql.conf', 'autovacuum=off'); $node->start; $port = $node->port; diff --git a/src/bin/pg_amcheck/t/004_verify_heapam.pl b/src/bin/pg_amcheck/t/004_verify_heapam.pl index f6d2c5f787..2c17f7d068 100644 --- a/src/bin/pg_amcheck/t/004_verify_heapam.pl +++ b/src/bin/pg_amcheck/t/004_verify_heapam.pl @@ -181,7 +181,7 @@ my $aborted_xid; # autovacuum workers visiting the table could crash the backend. # Disable autovacuum so that won't happen. my $node = PostgreSQL::Test::Cluster->new('test'); -$node->init; +$node->init(no_checksums => 1); $node->append_conf('postgresql.conf', 'autovacuum=off'); $node->append_conf('postgresql.conf', 'max_prepared_transactions=10'); diff --git a/src/bin/pg_checksums/t/002_actions.pl b/src/bin/pg_checksums/t/002_actions.pl index 33e7fb53c5..c136febbbb 100644 --- a/src/bin/pg_checksums/t/002_actions.pl +++ b/src/bin/pg_checksums/t/002_actions.pl @@ -88,7 +88,7 @@ sub check_relation_corruption # Initialize node with checksums disabled. my $node = PostgreSQL::Test::Cluster->new('node_checksum'); -$node->init(); +$node->init(no_checksums => 1); my $pgdata = $node->data_dir; # Control file should know that checksums are disabled. diff --git a/src/test/perl/PostgreSQL/Test/Cluster.pm b/src/test/perl/PostgreSQL/Test/Cluster.pm index 32ee98aebc..632a2c9ebc 100644 --- a/src/test/perl/PostgreSQL/Test/Cluster.pm +++ b/src/test/perl/PostgreSQL/Test/Cluster.pm @@ -504,6 +504,8 @@ On Windows, we use SSPI authentication to ensure the same (by pg_regress WAL archiving can be enabled on this node by passing the keyword parameter has_archiving => 1. This is disabled by default. +Data checksums can be forced off by passing no_checksums => 1. + postgresql.conf can be set up for replication by passing the keyword parameter allows_streaming => 'logical' or 'physical' (passing 1 will also suffice for physical replication) depending on type of replication that @@ -530,6 +532,11 @@ sub init $params{force_initdb} = 0 unless defined $params{force_initdb}; $params{has_archiving} = 0 unless defined $params{has_archiving}; + if (defined $params{no_checksums}) + { + push @{ $params{extra} }, '--no-data-checksums'; + } + my $initdb_extra_opts_env = $ENV{PG_TEST_INITDB_EXTRA_OPTS}; if (defined $initdb_extra_opts_env) { --