On 2022/07/21 14:54, Kyotaro Horiguchi wrote:
I agree to removing the two parameters. And agree to let
ReadCheckpointRecord not conscious of the location source.
Thanks for the review!
At Thu, 21 Jul 2022 11:45:23 +0900, Fujii Masao <masao.fu...@oss.nttdata.com>
wrote in
Agreed. Attached is the updated version of the patch.
Thanks for the review!
- (errmsg("could not locate required checkpoint record"),
+ (errmsg("could not locate a valid checkpoint record in backup_label
file"),
"in backup_label" there looks *to me* need some verb..
Sorry, I failed to understand your point. Could you clarify your point?
By the way,
this looks like a good chance to remove the (now) extra parens around
errmsg() and friends.
For example:
- (errmsg("could not locate a valid checkpoint record in backup_label
file"),
+ errmsg("could not locate checkpoint record spcified in backup_label
file"),
- (errmsg("could not locate a valid checkpoint record in control file")));
+ errmsg("could not locate checkpoint record recorded in control file")));
Because it's recommended not to put parenthesis just before errmsg(), you mean?
I'm ok to remove such parenthesis, but I'd like understand why before doing
that. I was thinking that either having or not having parenthesis in front of
errmsg() is ok, so there are many calls to errmsg() with parenthesis, in
xlogrecovery.c.
+ (errmsg("invalid checkpoint record")));
Is it useful to show the specified LSN there?
Yes, LSN info would be helpful also for debugging.
I separated the patch into two; one is to remove useless arguments in
ReadCheckpointRecord(), another is to improve log messages. I added LSN info in
log messages in the second patch.
+ (errmsg("invalid resource manager ID in checkpoint
record")));
We have a similar message "invalid resource manager ID %u at
%X/%X". Since the caller explains that it is a checkpoint record, we
can share the message here.
+1
+ (errmsg("invalid xl_info in checkpoint
record")));
(It is not an issue of this patch, though) I don't think this is
appropriate for user-facing message. Counldn't we say "unexpected
record type: %x" or something like?
The proposed log message doesn't look like an improvement. But I have no better
one. So I left the message as it is, in the patch, for now.
+ (errmsg("invalid length of checkpoint
record")));
We have "record with invalid length at %X/%X" or "invalid record
length at %X/%X: wanted %u, got %u". Counld we reuse any of them?
+1
Regards,
--
Fujii Masao
Advanced Computing Technology Center
Research and Development Headquarters
NTT DATA CORPORATION
From 1a7f5b66fed158306ed802cb08b328c2f8d47f20 Mon Sep 17 00:00:00 2001
From: Fujii Masao <fu...@postgresql.org>
Date: Wed, 20 Jul 2022 23:06:44 +0900
Subject: [PATCH 1/2] Remove useless arguments in ReadCheckpointRecord().
---
src/backend/access/transam/xlogrecovery.c | 84 ++++-------------------
1 file changed, 15 insertions(+), 69 deletions(-)
diff --git a/src/backend/access/transam/xlogrecovery.c
b/src/backend/access/transam/xlogrecovery.c
index 5d6f1b5e46..e383c2123a 100644
--- a/src/backend/access/transam/xlogrecovery.c
+++ b/src/backend/access/transam/xlogrecovery.c
@@ -422,8 +422,8 @@ static XLogPageReadResult
WaitForWALToBecomeAvailable(XLogRecPtr RecPtr,
XLogRecPtr replayLSN,
bool nonblocking);
static int emode_for_corrupt_record(int emode, XLogRecPtr RecPtr);
-static XLogRecord *ReadCheckpointRecord(XLogPrefetcher *xlogprefetcher,
XLogRecPtr RecPtr,
-
int whichChkpt, bool report, TimeLineID replayTLI);
+static XLogRecord *ReadCheckpointRecord(XLogPrefetcher *xlogprefetcher,
+
XLogRecPtr RecPtr, TimeLineID replayTLI);
static bool rescanLatestTimeLine(TimeLineID replayTLI, XLogRecPtr replayLSN);
static int XLogFileRead(XLogSegNo segno, int emode, TimeLineID tli,
XLogSource source, bool
notfoundOk);
@@ -605,7 +605,7 @@ InitWalRecovery(ControlFileData *ControlFile, bool
*wasShutdown_ptr,
* When a backup_label file is present, we want to roll forward
from
* the checkpoint it identifies, rather than using pg_control.
*/
- record = ReadCheckpointRecord(xlogprefetcher, CheckPointLoc, 0,
true,
+ record = ReadCheckpointRecord(xlogprefetcher, CheckPointLoc,
CheckPointTLI);
if (record != NULL)
{
@@ -744,7 +744,7 @@ InitWalRecovery(ControlFileData *ControlFile, bool
*wasShutdown_ptr,
CheckPointTLI = ControlFile->checkPointCopy.ThisTimeLineID;
RedoStartLSN = ControlFile->checkPointCopy.redo;
RedoStartTLI = ControlFile->checkPointCopy.ThisTimeLineID;
- record = ReadCheckpointRecord(xlogprefetcher, CheckPointLoc, 1,
true,
+ record = ReadCheckpointRecord(xlogprefetcher, CheckPointLoc,
CheckPointTLI);
if (record != NULL)
{
@@ -3843,13 +3843,10 @@ emode_for_corrupt_record(int emode, XLogRecPtr RecPtr)
/*
* Subroutine to try to fetch and validate a prior checkpoint record.
- *
- * whichChkpt identifies the checkpoint (merely for reporting purposes).
- * 1 for "primary", 0 for "other" (backup_label)
*/
static XLogRecord *
ReadCheckpointRecord(XLogPrefetcher *xlogprefetcher, XLogRecPtr RecPtr,
- int whichChkpt, bool report,
TimeLineID replayTLI)
+ TimeLineID replayTLI)
{
XLogRecord *record;
uint8 info;
@@ -3858,20 +3855,8 @@ ReadCheckpointRecord(XLogPrefetcher *xlogprefetcher,
XLogRecPtr RecPtr,
if (!XRecOffIsValid(RecPtr))
{
- if (!report)
- return NULL;
-
- switch (whichChkpt)
- {
- case 1:
- ereport(LOG,
- (errmsg("invalid primary
checkpoint link in control file")));
- break;
- default:
- ereport(LOG,
- (errmsg("invalid checkpoint
link in backup_label file")));
- break;
- }
+ ereport(LOG,
+ (errmsg("invalid checkpoint location")));
return NULL;
}
@@ -3880,67 +3865,28 @@ ReadCheckpointRecord(XLogPrefetcher *xlogprefetcher,
XLogRecPtr RecPtr,
if (record == NULL)
{
- if (!report)
- return NULL;
-
- switch (whichChkpt)
- {
- case 1:
- ereport(LOG,
- (errmsg("invalid primary
checkpoint record")));
- break;
- default:
- ereport(LOG,
- (errmsg("invalid checkpoint
record")));
- break;
- }
+ ereport(LOG,
+ (errmsg("invalid checkpoint record")));
return NULL;
}
if (record->xl_rmid != RM_XLOG_ID)
{
- switch (whichChkpt)
- {
- case 1:
- ereport(LOG,
- (errmsg("invalid resource
manager ID in primary checkpoint record")));
- break;
- default:
- ereport(LOG,
- (errmsg("invalid resource
manager ID in checkpoint record")));
- break;
- }
+ ereport(LOG,
+ (errmsg("invalid resource manager ID in
checkpoint record")));
return NULL;
}
info = record->xl_info & ~XLR_INFO_MASK;
if (info != XLOG_CHECKPOINT_SHUTDOWN &&
info != XLOG_CHECKPOINT_ONLINE)
{
- switch (whichChkpt)
- {
- case 1:
- ereport(LOG,
- (errmsg("invalid xl_info in
primary checkpoint record")));
- break;
- default:
- ereport(LOG,
- (errmsg("invalid xl_info in
checkpoint record")));
- break;
- }
+ ereport(LOG,
+ (errmsg("invalid xl_info in checkpoint
record")));
return NULL;
}
if (record->xl_tot_len != SizeOfXLogRecord +
SizeOfXLogRecordDataHeaderShort + sizeof(CheckPoint))
{
- switch (whichChkpt)
- {
- case 1:
- ereport(LOG,
- (errmsg("invalid length of
primary checkpoint record")));
- break;
- default:
- ereport(LOG,
- (errmsg("invalid length of
checkpoint record")));
- break;
- }
+ ereport(LOG,
+ (errmsg("invalid length of checkpoint
record")));
return NULL;
}
return record;
--
2.36.0
From 4358d1585121ad5685c08f19516b49d4e3045640 Mon Sep 17 00:00:00 2001
From: Fujii Masao <fu...@postgresql.org>
Date: Fri, 22 Jul 2022 10:05:33 +0900
Subject: [PATCH 2/2] Improve log messages when invalid checkpoint record was
found.
---
src/backend/access/transam/xlogrecovery.c | 22 ++++++++++++++--------
1 file changed, 14 insertions(+), 8 deletions(-)
diff --git a/src/backend/access/transam/xlogrecovery.c
b/src/backend/access/transam/xlogrecovery.c
index e383c2123a..07bb1f03b8 100644
--- a/src/backend/access/transam/xlogrecovery.c
+++ b/src/backend/access/transam/xlogrecovery.c
@@ -638,7 +638,7 @@ InitWalRecovery(ControlFileData *ControlFile, bool
*wasShutdown_ptr,
else
{
ereport(FATAL,
- (errmsg("could not locate required
checkpoint record"),
+ (errmsg("could not locate a valid
checkpoint record in backup_label file"),
errhint("If you are restoring from a
backup, touch \"%s/recovery.signal\" and add required recovery options.\n"
"If you are not
restoring from a backup, try removing the file \"%s/backup_label\".\n"
"Be careful: removing
\"%s/backup_label\" will result in a corrupt cluster if restoring from a
backup.",
@@ -761,7 +761,7 @@ InitWalRecovery(ControlFileData *ControlFile, bool
*wasShutdown_ptr,
* simplify processing around checkpoints.
*/
ereport(PANIC,
- (errmsg("could not locate a valid
checkpoint record")));
+ (errmsg("could not locate a valid
checkpoint record in control file")));
}
memcpy(&checkPoint, XLogRecGetData(xlogreader),
sizeof(CheckPoint));
wasShutdown = ((record->xl_info & ~XLR_INFO_MASK) ==
XLOG_CHECKPOINT_SHUTDOWN);
@@ -3856,7 +3856,8 @@ ReadCheckpointRecord(XLogPrefetcher *xlogprefetcher,
XLogRecPtr RecPtr,
if (!XRecOffIsValid(RecPtr))
{
ereport(LOG,
- (errmsg("invalid checkpoint location")));
+ (errmsg("invalid checkpoint location: %X/%X",
+ LSN_FORMAT_ARGS(RecPtr))));
return NULL;
}
@@ -3866,13 +3867,15 @@ ReadCheckpointRecord(XLogPrefetcher *xlogprefetcher,
XLogRecPtr RecPtr,
if (record == NULL)
{
ereport(LOG,
- (errmsg("invalid checkpoint record")));
+ (errmsg("invalid checkpoint record at %X/%X",
+ LSN_FORMAT_ARGS(RecPtr))));
return NULL;
}
if (record->xl_rmid != RM_XLOG_ID)
{
ereport(LOG,
- (errmsg("invalid resource manager ID in
checkpoint record")));
+ (errmsg("invalid resource manager ID %u in
checkpoint record at %X/%X",
+ record->xl_rmid,
LSN_FORMAT_ARGS(RecPtr))));
return NULL;
}
info = record->xl_info & ~XLR_INFO_MASK;
@@ -3880,13 +3883,16 @@ ReadCheckpointRecord(XLogPrefetcher *xlogprefetcher,
XLogRecPtr RecPtr,
info != XLOG_CHECKPOINT_ONLINE)
{
ereport(LOG,
- (errmsg("invalid xl_info in checkpoint
record")));
+ (errmsg("invalid xl_info in checkpoint record
at %X/%X",
+ LSN_FORMAT_ARGS(RecPtr))));
return NULL;
}
- if (record->xl_tot_len != SizeOfXLogRecord +
SizeOfXLogRecordDataHeaderShort + sizeof(CheckPoint))
+ if (record->xl_tot_len != SizeOfXLogRecord +
+ SizeOfXLogRecordDataHeaderShort + sizeof(CheckPoint))
{
ereport(LOG,
- (errmsg("invalid length of checkpoint
record")));
+ (errmsg("checkpoint record with invalid length
at %X/%X",
+ LSN_FORMAT_ARGS(RecPtr))));
return NULL;
}
return record;
--
2.36.0