From 80c9db24c4937e1782703c123ab67d74d8b20690 Mon Sep 17 00:00:00 2001
From: Nitin Jadhav <nitinjadhavpostgres@gmail.com>
Date: Fri, 6 Mar 2026 04:52:17 +0000
Subject: [PATCH 2/2] Add TAP tests for missing redo/checkpoint during backup
 recovery

Add two recovery TAP tests to validate PostgreSQL behavior when WAL
records required for startup are missing in the presence of a
backup_label file.

The first test covers the case where the checkpoint record referenced
by backup_label is missing, and verifies that recovery fails with a
clear FATAL error.

The second test covers the case where the redo record referenced by the
checkpoint is missing while a backup_label file is present, with redo
and checkpoint records forced into different WAL segments using
injection points.
---
 ...53_missing_checkpoint_with_backup_label.pl |  86 ++++++++++
 .../t/054_missing_redo_with_backup_label.pl   | 152 ++++++++++++++++++
 2 files changed, 238 insertions(+)
 create mode 100644 src/test/recovery/t/053_missing_checkpoint_with_backup_label.pl
 create mode 100644 src/test/recovery/t/054_missing_redo_with_backup_label.pl

diff --git a/src/test/recovery/t/053_missing_checkpoint_with_backup_label.pl b/src/test/recovery/t/053_missing_checkpoint_with_backup_label.pl
new file mode 100644
index 00000000000..7be070abfea
--- /dev/null
+++ b/src/test/recovery/t/053_missing_checkpoint_with_backup_label.pl
@@ -0,0 +1,86 @@
+# Copyright (c) 2025-2026, PostgreSQL Global Development Group
+#
+# Verify crash recovery behavior when the WAL segment containing the
+# checkpoint record referenced by backup_label is missing.
+#
+# Expected behavior: startup fails with FATAL and logs a message about
+# not being able to locate a valid checkpoint record.
+
+use strict;
+use warnings FATAL => 'all';
+use PostgreSQL::Test::Cluster;
+use PostgreSQL::Test::Utils;
+use Test::More;
+
+my $node = PostgreSQL::Test::Cluster->new('testnode');
+$node->init(allows_streaming => 1);
+$node->append_conf('postgresql.conf', 'wal_level = replica');
+$node->start;
+
+# Generate WAL and force a checkpoint
+$node->safe_psql('postgres',
+    q{CREATE TABLE t(a int); INSERT INTO t VALUES (1),(2),(3);});
+$node->safe_psql('postgres', 'CHECKPOINT');
+
+# Take a physical base backup (creates backup_label)
+my $backupname = 'fs_bkp';
+$node->backup($backupname);
+
+my $reco = PostgreSQL::Test::Cluster->new('recovery_from_backup_ckpt');
+$reco->init_from_backup(
+    $node,
+    $backupname,
+    has_restored => 1,
+);
+
+# Assert backup_label exists
+my $backup_label = $reco->data_dir . '/backup_label';
+ok(
+    -e $backup_label,
+    'backup_label exists'
+);
+
+# Determine WAL file containing the checkpoint record
+my $backup_label_path = $reco->data_dir . '/backup_label';
+my $backup_label_contents = slurp_file($backup_label_path);
+
+my ($checkpoint_walfile) =
+    $backup_label_contents =~
+        /\(file\s+([0-9A-F]{24})\)/;
+ok(
+    defined $checkpoint_walfile,
+    "extracted checkpoint WAL file from backup_label: $checkpoint_walfile"
+);
+
+# Remove the WAL segment containing the checkpoint record
+my $pgwal = $reco->data_dir . '/pg_wal';
+ok(-d $pgwal, 'pg_wal directory exists');
+
+my $target = "$pgwal/$checkpoint_walfile";
+ok(
+    -e $target,
+    "checkpoint WAL segment exists before removal: $target"
+) or die "Expected WAL segment $target not found";
+
+unlink($target)
+  or die "unlink $target failed: $!";
+
+# Start the server and confirm that recovery has failed, as expected.
+command_fails(
+    [
+        'pg_ctl',
+        '--pgdata' => $reco->data_dir,
+        '--log'    => $reco->logfile,
+        'start',
+    ],
+    'startup fails when checkpoint WAL is missing with backup_label present'
+);
+
+my $log = slurp_file($reco->logfile);
+like(
+    $log,
+    qr/(?:FATAL|PANIC): .*could not locate required checkpoin record/i,
+    'server log reports missing checkpoint record'
+);
+
+done_testing();
\ No newline at end of file
diff --git a/src/test/recovery/t/054_missing_redo_with_backup_label.pl b/src/test/recovery/t/054_missing_redo_with_backup_label.pl
new file mode 100644
index 00000000000..f2d3352c46f
--- /dev/null
+++ b/src/test/recovery/t/054_missing_redo_with_backup_label.pl
@@ -0,0 +1,152 @@
+# Copyright (c) 2025-2026, PostgreSQL Global Development Group
+#
+# Verify recovery behavior when a WAL segment containing the redo record is
+# missing, with a checkpoint record located in a different segment, in the
+# presence of a backup_label file.
+#
+# Expected behavior: startup fails with FATAL and logs a message about not
+# being able to find the redo location referenced by the checkpoint record.
+
+use strict;
+use warnings FATAL => 'all';
+use PostgreSQL::Test::Cluster;
+use PostgreSQL::Test::Utils;
+use Test::More;
+
+if ($ENV{enable_injection_points} ne 'yes')
+{
+    plan skip_all => 'Injection points not supported by this build';
+}
+
+my $node = PostgreSQL::Test::Cluster->new('testnode');
+$node->init;
+$node->append_conf('postgresql.conf', 'log_checkpoints = on');
+$node->start;
+
+# Check if the extension injection_points is available.
+if (!$node->check_extension('injection_points'))
+{
+    plan skip_all => 'Extension injection_points not installed';
+}
+$node->safe_psql('postgres', q(CREATE EXTENSION injection_points));
+
+# Note that this uses two injection points based on waits, not one.  This
+# may look strange, but this works as a workaround to enforce all memory
+# allocations to happen outside the critical section of the checkpoint
+# required for this test.
+# First, "create-checkpoint-initial" is run outside the critical section
+# section, and is used as a way to initialize the shared memory required
+# for the wait machinery with its DSM registry.
+# Then, "create-checkpoint-run" is loaded outside the critical section of
+# a checkpoint to allocate any memory required by the library load, and
+# its callback is run inside the critical section.
+$node->safe_psql('postgres',
+    q{SELECT injection_points_attach('create-checkpoint-initial', 'wait')});
+$node->safe_psql('postgres',
+    q{SELECT injection_points_attach('create-checkpoint-run', 'wait')});
+
+# Start a psql session to run the checkpoint in the background and make
+# the test wait on the injection point so the checkpoint stops just after
+# it starts.
+my $checkpoint = $node->background_psql('postgres');
+$checkpoint->query_until(
+    qr/starting_checkpoint/,
+    q(\echo starting_checkpoint
+checkpoint;
+));
+
+# Wait for the initial point to finish, the checkpointer is still
+# outside its critical section.  Then release to reach the second
+# point.
+$node->wait_for_event('checkpointer', 'create-checkpoint-initial');
+$node->safe_psql('postgres',
+    q{SELECT injection_points_wakeup('create-checkpoint-initial')});
+
+# Wait until the checkpoint has reached the second injection point.
+# We are now in the middle of a checkpoint running, after the redo
+# record has been logged.
+$node->wait_for_event('checkpointer', 'create-checkpoint-run');
+
+# Switch WAL segment to ensure redo and checkpoint records are in different
+# segments.
+$node->safe_psql('postgres', 'SELECT pg_switch_wal()');
+
+# Continue checkpoint and wait for completion.
+my $log_offset = -s $node->logfile;
+$node->safe_psql('postgres',
+    q{SELECT injection_points_wakeup('create-checkpoint-run')});
+$node->wait_for_log(qr/checkpoint complete/, $log_offset);
+
+$checkpoint->quit;
+
+# Retrieve the WAL file names for the redo record and checkpoint record.
+my $redo_lsn = $node->safe_psql('postgres',
+    q{SELECT redo_lsn FROM pg_control_checkpoint()});
+my $checkpoint_lsn = $node->safe_psql('postgres',
+    q{SELECT checkpoint_lsn FROM pg_control_checkpoint()});
+my $redo_walfile_name =
+  $node->safe_psql('postgres', "SELECT pg_walfile_name('$redo_lsn')");
+my $checkpoint_walfile_name =
+  $node->safe_psql('postgres', "SELECT pg_walfile_name('$checkpoint_lsn')");
+
+# Redo record and checkpoint record should be on different segments.
+isnt($redo_walfile_name, $checkpoint_walfile_name,
+    'redo and checkpoint records on different segments');
+
+# Stop and take a cold filesystem backup of the stopped server.
+$node->stop('immediate');
+my $backupname = 'cold_bkp';
+$node->backup_fs_cold($backupname);
+
+# Restore cold backup into a new node.
+my $reco = PostgreSQL::Test::Cluster->new('reco_with_backup_label');
+$reco->init_from_backup($node, $backupname, has_restored => 1);
+
+# Manually create backup_label in restored cluster to force backup recovery.
+my $backup_label_path = $reco->data_dir . '/backup_label';
+
+# Extract timeline from WAL filename (first 8 hex digits).
+my $tli_hex = substr($checkpoint_walfile_name, 0, 8);
+my $tli = hex($tli_hex);
+
+open(my $bl, '>', $backup_label_path)
+  or die "could not create backup_label: $!";
+print $bl "START WAL LOCATION: $redo_lsn (file $redo_walfile_name)\n";
+print $bl "CHECKPOINT LOCATION: $checkpoint_lsn\n";
+print $bl "BACKUP METHOD: test\n";
+print $bl "BACKUP FROM: primary\n";
+print $bl "START TIMELINE: $tli\n";
+print $bl "CHECKPOINT TIMELINE: $tli\n";
+print $bl "LABEL: redo missing with backup_label\n";
+close($bl);
+
+ok(-e $backup_label_path, 'backup_label exists before startup');
+
+# Remove the WAL segment containing the redo record.
+my $redo_path = $reco->data_dir . "/pg_wal/$redo_walfile_name";
+my $ckpt_path = $reco->data_dir . "/pg_wal/$checkpoint_walfile_name";
+
+ok(-e $ckpt_path, "checkpoint WAL segment exists: $ckpt_path");
+ok(-e $redo_path, "redo WAL segment exists before removal: $redo_path")
+  or die "Expected WAL segment $redo_path not found";
+
+unlink($redo_path)
+  or die "could not remove redo WAL file: $!";
+
+# Use run_log instead of node->start because this test expects that
+# the server ends with an error during recovery.
+run_log(
+	[
+		'pg_ctl',
+		'--pgdata' => $reco->data_dir,
+		'--log'    => $reco->logfile,
+		'start',
+	]);
+
+# Confirm that recovery has failed, as expected.
+my $logfile = slurp_file($reco->logfile());
+ok( $logfile =~
+	  qr/FATAL: .* could not find redo location .* referenced by checkpoint record at .*/,
+	"ends with FATAL because it could not find redo location");
+
+done_testing();
\ No newline at end of file
-- 
2.43.0

