Hi,

On 2021-09-03 20:01:50 -0400, Alvaro Herrera wrote:
> From 6abc5026f92b99d704bff527d1306eb8588635e9 Mon Sep 17 00:00:00 2001
> From: Alvaro Herrera <alvhe...@alvh.no-ip.org>
> Date: Tue, 31 Aug 2021 20:55:10 -0400
> Subject: [PATCH v3 1/5] Revert "Avoid creating archive status ".ready" files
>  too early"

> This reverts commit 515e3d84a0b58b58eb30194209d2bc47ed349f5b.

I'd prefer to see this pushed soon. I've a bunch of patches to xlog.c that
conflict with the prior changes, and rebasing back and forth isn't that much
fun...



> From f767cdddb3120f1f6c079c8eb00eaff38ea98c79 Mon Sep 17 00:00:00 2001
> From: Alvaro Herrera <alvhe...@alvh.no-ip.org>
> Date: Thu, 2 Sep 2021 17:21:46 -0400
> Subject: [PATCH v3 2/5] Implement FIRST_IS_ABORTED_CONTRECORD
>
> ---
>  src/backend/access/transam/xlog.c       | 53 +++++++++++++++++++++++--
>  src/backend/access/transam/xlogreader.c | 39 +++++++++++++++++-
>  src/include/access/xlog_internal.h      | 14 ++++++-
>  src/include/access/xlogreader.h         |  3 ++
>  4 files changed, 103 insertions(+), 6 deletions(-)
>
> diff --git a/src/backend/access/transam/xlog.c 
> b/src/backend/access/transam/xlog.c
> index e51a7a749d..411f1618df 100644
> --- a/src/backend/access/transam/xlog.c
> +++ b/src/backend/access/transam/xlog.c
> @@ -586,6 +586,8 @@ typedef struct XLogCtlData
>       XLogRecPtr      replicationSlotMinLSN;  /* oldest LSN needed by any 
> slot */
>
>       XLogSegNo       lastRemovedSegNo;       /* latest removed/recycled XLOG 
> segment */
> +     XLogRecPtr      abortedContrecordPtr; /* LSN of incomplete record at 
> end of
> +                                                                        * 
> WAL */
>
>       /* Fake LSN counter, for unlogged relations. Protected by ulsn_lck. */
>       XLogRecPtr      unloggedLSN;
> @@ -848,6 +850,7 @@ static XLogSource XLogReceiptSource = XLOG_FROM_ANY;
>  /* State information for XLOG reading */
>  static XLogRecPtr ReadRecPtr;        /* start of last record read */
>  static XLogRecPtr EndRecPtr; /* end+1 of last record read */
> +static XLogRecPtr abortedContrecordPtr;      /* end+1 of incomplete record */
>
>  /*
>   * Local copies of equivalent fields in the control file.  When running
> @@ -2246,6 +2249,30 @@ AdvanceXLInsertBuffer(XLogRecPtr upto, bool 
> opportunistic)
>               if (!Insert->forcePageWrites)
>                       NewPage->xlp_info |= XLP_BKP_REMOVABLE;
>
> +             /*
> +              * If the last page ended with an aborted partial continuation 
> record,
> +              * mark the new page to indicate that the partial record can be
> +              * omitted.
> +              *
> +              * This happens only once at the end of recovery, so there's no 
> race
> +              * condition here.
> +              */
> +             if (XLogCtl->abortedContrecordPtr >= NewPageBeginPtr)
> +             {

Can we move this case out of AdvanceXLInsertBuffer()? As the comment says,
this only happens at the end of recovery, so putting it into
AdvanceXLInsertBuffer() doesn't really seem necessary?


> +#ifdef WAL_DEBUG
> +                     if (XLogCtl->abortedContrecordPtr != NewPageBeginPtr)
> +                             elog(PANIC, "inconsistent aborted contrecord 
> location %X/%X, expected %X/%X",
> +                                      
> LSN_FORMAT_ARGS(XLogCtl->abortedContrecordPtr),
> +                                      LSN_FORMAT_ARGS(NewPageBeginPtr));
> +                     ereport(LOG,
> +                                     (errmsg_internal("setting 
> XLP_FIRST_IS_ABORTED_PARTIAL flag at %X/%X",
> +                                                                      
> LSN_FORMAT_ARGS(NewPageBeginPtr))));
> +#endif
> +                     NewPage->xlp_info |= XLP_FIRST_IS_ABORTED_PARTIAL;
> +
> +                     XLogCtl->abortedContrecordPtr = InvalidXLogRecPtr;
> +             }

>               /*
>                * If first page of an XLOG segment file, make it a long header.
>                */
> @@ -4392,6 +4419,7 @@ ReadRecord(XLogReaderState *xlogreader, int emode,
>               record = XLogReadRecord(xlogreader, &errormsg);
>               ReadRecPtr = xlogreader->ReadRecPtr;
>               EndRecPtr = xlogreader->EndRecPtr;
> +             abortedContrecordPtr = xlogreader->abortedContrecordPtr;
>               if (record == NULL)
>               {
>                       if (readFile >= 0)
> @@ -7691,10 +7719,29 @@ StartupXLOG(void)
>       /*
>        * Re-fetch the last valid or last applied record, so we can identify 
> the
>        * exact endpoint of what we consider the valid portion of WAL.
> +      *
> +      * When recovery ended in an incomplete record, continue writing from 
> the
> +      * point where it went missing.  This leaves behind an initial part of
> +      * broken record, which rescues downstream which have already received
> +      * that first part.
>        */
> -     XLogBeginRead(xlogreader, LastRec);
> -     record = ReadRecord(xlogreader, PANIC, false);
> -     EndOfLog = EndRecPtr;
> +     if (XLogRecPtrIsInvalid(abortedContrecordPtr))
> +     {
> +             XLogBeginRead(xlogreader, LastRec);
> +             record = ReadRecord(xlogreader, PANIC, false);
> +             EndOfLog = EndRecPtr;
> +     }
> +     else
> +     {
> +#ifdef WAL_DEBUG
> +             ereport(LOG,
> +                             (errmsg_internal("recovery overwriting broken 
> contrecord at %X/%X (EndRecPtr: %X/%X)",
> +                                                              
> LSN_FORMAT_ARGS(abortedContrecordPtr),
> +                                                              
> LSN_FORMAT_ARGS(EndRecPtr))));
> +#endif

"broken" sounds a bit off. But then, it's just WAL_DEBUG. Which made me
realize, isn't this missing a
if (XLOG_DEBUG)?



> @@ -442,14 +448,28 @@ XLogReadRecord(XLogReaderState *state, char **errormsg)
>                       readOff = ReadPageInternal(state, targetPagePtr,
>                                                                          
> Min(total_len - gotlen + SizeOfXLogShortPHD,
>                                                                               
>    XLOG_BLCKSZ));
> -
>                       if (readOff < 0)
>                               goto err;
>
>                       Assert(SizeOfXLogShortPHD <= readOff);
>
> -                     /* Check that the continuation on next page looks valid 
> */
>                       pageHeader = (XLogPageHeader) state->readBuf;
> +
> +                     /*
> +                      * If we were expecting a continuation record and got 
> an "aborted
> +                      * partial" flag, that means the continuation record 
> was lost.
> +                      * Ignore the record we were reading, since we now know 
> it's broken
> +                      * and lost forever, and restart the read by assuming 
> the address
> +                      * to read is the location where we found this flag.
> +                      */
> +                     if (pageHeader->xlp_info & XLP_FIRST_IS_ABORTED_PARTIAL)
> +                     {
> +                             ResetDecoder(state);
> +                             RecPtr = targetPagePtr;
> +                             goto restart;
> +                     }

I think we need to add more validation to this path. What I was proposing
earlier is that we add a new special type of record at the start of an
XLP_FIRST_IS_ABORTED_PARTIAL page, which contains a) lsn of the record we're
aborting, b) checksum of the data up to this point.


Greetings,

Andres Freund


Reply via email to