Hi, On 2021-09-03 20:01:50 -0400, Alvaro Herrera wrote: > From 6abc5026f92b99d704bff527d1306eb8588635e9 Mon Sep 17 00:00:00 2001 > From: Alvaro Herrera <alvhe...@alvh.no-ip.org> > Date: Tue, 31 Aug 2021 20:55:10 -0400 > Subject: [PATCH v3 1/5] Revert "Avoid creating archive status ".ready" files > too early"
> This reverts commit 515e3d84a0b58b58eb30194209d2bc47ed349f5b. I'd prefer to see this pushed soon. I've a bunch of patches to xlog.c that conflict with the prior changes, and rebasing back and forth isn't that much fun... > From f767cdddb3120f1f6c079c8eb00eaff38ea98c79 Mon Sep 17 00:00:00 2001 > From: Alvaro Herrera <alvhe...@alvh.no-ip.org> > Date: Thu, 2 Sep 2021 17:21:46 -0400 > Subject: [PATCH v3 2/5] Implement FIRST_IS_ABORTED_CONTRECORD > > --- > src/backend/access/transam/xlog.c | 53 +++++++++++++++++++++++-- > src/backend/access/transam/xlogreader.c | 39 +++++++++++++++++- > src/include/access/xlog_internal.h | 14 ++++++- > src/include/access/xlogreader.h | 3 ++ > 4 files changed, 103 insertions(+), 6 deletions(-) > > diff --git a/src/backend/access/transam/xlog.c > b/src/backend/access/transam/xlog.c > index e51a7a749d..411f1618df 100644 > --- a/src/backend/access/transam/xlog.c > +++ b/src/backend/access/transam/xlog.c > @@ -586,6 +586,8 @@ typedef struct XLogCtlData > XLogRecPtr replicationSlotMinLSN; /* oldest LSN needed by any > slot */ > > XLogSegNo lastRemovedSegNo; /* latest removed/recycled XLOG > segment */ > + XLogRecPtr abortedContrecordPtr; /* LSN of incomplete record at > end of > + * > WAL */ > > /* Fake LSN counter, for unlogged relations. Protected by ulsn_lck. */ > XLogRecPtr unloggedLSN; > @@ -848,6 +850,7 @@ static XLogSource XLogReceiptSource = XLOG_FROM_ANY; > /* State information for XLOG reading */ > static XLogRecPtr ReadRecPtr; /* start of last record read */ > static XLogRecPtr EndRecPtr; /* end+1 of last record read */ > +static XLogRecPtr abortedContrecordPtr; /* end+1 of incomplete record */ > > /* > * Local copies of equivalent fields in the control file. When running > @@ -2246,6 +2249,30 @@ AdvanceXLInsertBuffer(XLogRecPtr upto, bool > opportunistic) > if (!Insert->forcePageWrites) > NewPage->xlp_info |= XLP_BKP_REMOVABLE; > > + /* > + * If the last page ended with an aborted partial continuation > record, > + * mark the new page to indicate that the partial record can be > + * omitted. > + * > + * This happens only once at the end of recovery, so there's no > race > + * condition here. > + */ > + if (XLogCtl->abortedContrecordPtr >= NewPageBeginPtr) > + { Can we move this case out of AdvanceXLInsertBuffer()? As the comment says, this only happens at the end of recovery, so putting it into AdvanceXLInsertBuffer() doesn't really seem necessary? > +#ifdef WAL_DEBUG > + if (XLogCtl->abortedContrecordPtr != NewPageBeginPtr) > + elog(PANIC, "inconsistent aborted contrecord > location %X/%X, expected %X/%X", > + > LSN_FORMAT_ARGS(XLogCtl->abortedContrecordPtr), > + LSN_FORMAT_ARGS(NewPageBeginPtr)); > + ereport(LOG, > + (errmsg_internal("setting > XLP_FIRST_IS_ABORTED_PARTIAL flag at %X/%X", > + > LSN_FORMAT_ARGS(NewPageBeginPtr)))); > +#endif > + NewPage->xlp_info |= XLP_FIRST_IS_ABORTED_PARTIAL; > + > + XLogCtl->abortedContrecordPtr = InvalidXLogRecPtr; > + } > /* > * If first page of an XLOG segment file, make it a long header. > */ > @@ -4392,6 +4419,7 @@ ReadRecord(XLogReaderState *xlogreader, int emode, > record = XLogReadRecord(xlogreader, &errormsg); > ReadRecPtr = xlogreader->ReadRecPtr; > EndRecPtr = xlogreader->EndRecPtr; > + abortedContrecordPtr = xlogreader->abortedContrecordPtr; > if (record == NULL) > { > if (readFile >= 0) > @@ -7691,10 +7719,29 @@ StartupXLOG(void) > /* > * Re-fetch the last valid or last applied record, so we can identify > the > * exact endpoint of what we consider the valid portion of WAL. > + * > + * When recovery ended in an incomplete record, continue writing from > the > + * point where it went missing. This leaves behind an initial part of > + * broken record, which rescues downstream which have already received > + * that first part. > */ > - XLogBeginRead(xlogreader, LastRec); > - record = ReadRecord(xlogreader, PANIC, false); > - EndOfLog = EndRecPtr; > + if (XLogRecPtrIsInvalid(abortedContrecordPtr)) > + { > + XLogBeginRead(xlogreader, LastRec); > + record = ReadRecord(xlogreader, PANIC, false); > + EndOfLog = EndRecPtr; > + } > + else > + { > +#ifdef WAL_DEBUG > + ereport(LOG, > + (errmsg_internal("recovery overwriting broken > contrecord at %X/%X (EndRecPtr: %X/%X)", > + > LSN_FORMAT_ARGS(abortedContrecordPtr), > + > LSN_FORMAT_ARGS(EndRecPtr)))); > +#endif "broken" sounds a bit off. But then, it's just WAL_DEBUG. Which made me realize, isn't this missing a if (XLOG_DEBUG)? > @@ -442,14 +448,28 @@ XLogReadRecord(XLogReaderState *state, char **errormsg) > readOff = ReadPageInternal(state, targetPagePtr, > > Min(total_len - gotlen + SizeOfXLogShortPHD, > > XLOG_BLCKSZ)); > - > if (readOff < 0) > goto err; > > Assert(SizeOfXLogShortPHD <= readOff); > > - /* Check that the continuation on next page looks valid > */ > pageHeader = (XLogPageHeader) state->readBuf; > + > + /* > + * If we were expecting a continuation record and got > an "aborted > + * partial" flag, that means the continuation record > was lost. > + * Ignore the record we were reading, since we now know > it's broken > + * and lost forever, and restart the read by assuming > the address > + * to read is the location where we found this flag. > + */ > + if (pageHeader->xlp_info & XLP_FIRST_IS_ABORTED_PARTIAL) > + { > + ResetDecoder(state); > + RecPtr = targetPagePtr; > + goto restart; > + } I think we need to add more validation to this path. What I was proposing earlier is that we add a new special type of record at the start of an XLP_FIRST_IS_ABORTED_PARTIAL page, which contains a) lsn of the record we're aborting, b) checksum of the data up to this point. Greetings, Andres Freund