On 2022/07/21 0:29, Bharath Rupireddy wrote:
How about we transform the following messages into something like below?
(errmsg("could not locate a valid checkpoint record"))); after
ReadCheckpointRecord() for control file cases to "could not locate
valid checkpoint record in control file"
(errmsg("could not locate required checkpoint record"), after
ReadCheckpointRecord() for backup_label case to "could not locate
valid checkpoint record in backup_label file"
The above messages give more meaningful and unique info to the users.
Agreed. Attached is the updated version of the patch.
Thanks for the review!
May be unrelated, IIRC, for the errors like ereport(PANIC,
(errmsg("could not locate a valid checkpoint record"))); we wanted to
add a hint asking users to consider running pg_resetwal to fix the
issue. The error for ReadCheckpointRecord() in backup_label file
cases, already gives a hint errhint("If you are restoring from a
backup, touch \"%s/recovery.signal\" ...... Adding the hint of running
pg_resetwal (of course with a caution that it can cause inconsistency
in the data and use it as a last resort as described in the docs)
helps users and support engineers a lot to mitigate the server down
cases quickly.
+1 to add some hint messages. But I'm not sure if it's good to hint the use of
pg_resetwal because, as you're saying, it should be used as a last resort and
has some big risks like data loss, corruption, etc. That is, I'm concerned
about that some users might quickly run pg_resetwal because hint message says
that, without reading the docs nor understanding those risks.
Regards,
--
Fujii Masao
Advanced Computing Technology Center
Research and Development Headquarters
NTT DATA CORPORATION
From a5e45acb753e9d93074a25a2fc409c693c46630c Mon Sep 17 00:00:00 2001
From: Fujii Masao <fu...@postgresql.org>
Date: Wed, 20 Jul 2022 23:06:44 +0900
Subject: [PATCH] Remove useless arguments in ReadCheckpointRecord().
---
src/backend/access/transam/xlogrecovery.c | 88 +++++------------------
1 file changed, 17 insertions(+), 71 deletions(-)
diff --git a/src/backend/access/transam/xlogrecovery.c
b/src/backend/access/transam/xlogrecovery.c
index 5d6f1b5e46..010f0cd7b2 100644
--- a/src/backend/access/transam/xlogrecovery.c
+++ b/src/backend/access/transam/xlogrecovery.c
@@ -422,8 +422,8 @@ static XLogPageReadResult
WaitForWALToBecomeAvailable(XLogRecPtr RecPtr,
XLogRecPtr replayLSN,
bool nonblocking);
static int emode_for_corrupt_record(int emode, XLogRecPtr RecPtr);
-static XLogRecord *ReadCheckpointRecord(XLogPrefetcher *xlogprefetcher,
XLogRecPtr RecPtr,
-
int whichChkpt, bool report, TimeLineID replayTLI);
+static XLogRecord *ReadCheckpointRecord(XLogPrefetcher *xlogprefetcher,
+
XLogRecPtr RecPtr, TimeLineID replayTLI);
static bool rescanLatestTimeLine(TimeLineID replayTLI, XLogRecPtr replayLSN);
static int XLogFileRead(XLogSegNo segno, int emode, TimeLineID tli,
XLogSource source, bool
notfoundOk);
@@ -605,7 +605,7 @@ InitWalRecovery(ControlFileData *ControlFile, bool
*wasShutdown_ptr,
* When a backup_label file is present, we want to roll forward
from
* the checkpoint it identifies, rather than using pg_control.
*/
- record = ReadCheckpointRecord(xlogprefetcher, CheckPointLoc, 0,
true,
+ record = ReadCheckpointRecord(xlogprefetcher, CheckPointLoc,
CheckPointTLI);
if (record != NULL)
{
@@ -638,7 +638,7 @@ InitWalRecovery(ControlFileData *ControlFile, bool
*wasShutdown_ptr,
else
{
ereport(FATAL,
- (errmsg("could not locate required
checkpoint record"),
+ (errmsg("could not locate a valid
checkpoint record in backup_label file"),
errhint("If you are restoring from a
backup, touch \"%s/recovery.signal\" and add required recovery options.\n"
"If you are not
restoring from a backup, try removing the file \"%s/backup_label\".\n"
"Be careful: removing
\"%s/backup_label\" will result in a corrupt cluster if restoring from a
backup.",
@@ -744,7 +744,7 @@ InitWalRecovery(ControlFileData *ControlFile, bool
*wasShutdown_ptr,
CheckPointTLI = ControlFile->checkPointCopy.ThisTimeLineID;
RedoStartLSN = ControlFile->checkPointCopy.redo;
RedoStartTLI = ControlFile->checkPointCopy.ThisTimeLineID;
- record = ReadCheckpointRecord(xlogprefetcher, CheckPointLoc, 1,
true,
+ record = ReadCheckpointRecord(xlogprefetcher, CheckPointLoc,
CheckPointTLI);
if (record != NULL)
{
@@ -761,7 +761,7 @@ InitWalRecovery(ControlFileData *ControlFile, bool
*wasShutdown_ptr,
* simplify processing around checkpoints.
*/
ereport(PANIC,
- (errmsg("could not locate a valid
checkpoint record")));
+ (errmsg("could not locate a valid
checkpoint record in control file")));
}
memcpy(&checkPoint, XLogRecGetData(xlogreader),
sizeof(CheckPoint));
wasShutdown = ((record->xl_info & ~XLR_INFO_MASK) ==
XLOG_CHECKPOINT_SHUTDOWN);
@@ -3843,13 +3843,10 @@ emode_for_corrupt_record(int emode, XLogRecPtr RecPtr)
/*
* Subroutine to try to fetch and validate a prior checkpoint record.
- *
- * whichChkpt identifies the checkpoint (merely for reporting purposes).
- * 1 for "primary", 0 for "other" (backup_label)
*/
static XLogRecord *
ReadCheckpointRecord(XLogPrefetcher *xlogprefetcher, XLogRecPtr RecPtr,
- int whichChkpt, bool report,
TimeLineID replayTLI)
+ TimeLineID replayTLI)
{
XLogRecord *record;
uint8 info;
@@ -3858,20 +3855,8 @@ ReadCheckpointRecord(XLogPrefetcher *xlogprefetcher,
XLogRecPtr RecPtr,
if (!XRecOffIsValid(RecPtr))
{
- if (!report)
- return NULL;
-
- switch (whichChkpt)
- {
- case 1:
- ereport(LOG,
- (errmsg("invalid primary
checkpoint link in control file")));
- break;
- default:
- ereport(LOG,
- (errmsg("invalid checkpoint
link in backup_label file")));
- break;
- }
+ ereport(LOG,
+ (errmsg("invalid checkpoint location")));
return NULL;
}
@@ -3880,67 +3865,28 @@ ReadCheckpointRecord(XLogPrefetcher *xlogprefetcher,
XLogRecPtr RecPtr,
if (record == NULL)
{
- if (!report)
- return NULL;
-
- switch (whichChkpt)
- {
- case 1:
- ereport(LOG,
- (errmsg("invalid primary
checkpoint record")));
- break;
- default:
- ereport(LOG,
- (errmsg("invalid checkpoint
record")));
- break;
- }
+ ereport(LOG,
+ (errmsg("invalid checkpoint record")));
return NULL;
}
if (record->xl_rmid != RM_XLOG_ID)
{
- switch (whichChkpt)
- {
- case 1:
- ereport(LOG,
- (errmsg("invalid resource
manager ID in primary checkpoint record")));
- break;
- default:
- ereport(LOG,
- (errmsg("invalid resource
manager ID in checkpoint record")));
- break;
- }
+ ereport(LOG,
+ (errmsg("invalid resource manager ID in
checkpoint record")));
return NULL;
}
info = record->xl_info & ~XLR_INFO_MASK;
if (info != XLOG_CHECKPOINT_SHUTDOWN &&
info != XLOG_CHECKPOINT_ONLINE)
{
- switch (whichChkpt)
- {
- case 1:
- ereport(LOG,
- (errmsg("invalid xl_info in
primary checkpoint record")));
- break;
- default:
- ereport(LOG,
- (errmsg("invalid xl_info in
checkpoint record")));
- break;
- }
+ ereport(LOG,
+ (errmsg("invalid xl_info in checkpoint
record")));
return NULL;
}
if (record->xl_tot_len != SizeOfXLogRecord +
SizeOfXLogRecordDataHeaderShort + sizeof(CheckPoint))
{
- switch (whichChkpt)
- {
- case 1:
- ereport(LOG,
- (errmsg("invalid length of
primary checkpoint record")));
- break;
- default:
- ereport(LOG,
- (errmsg("invalid length of
checkpoint record")));
- break;
- }
+ ereport(LOG,
+ (errmsg("invalid length of checkpoint
record")));
return NULL;
}
return record;
--
2.36.0