On Tue, Feb 28, 2023 at 5:52 AM Nathan Bossart <nathandboss...@gmail.com> wrote:
>
> On Tue, Feb 07, 2023 at 07:30:00PM +0530, Bharath Rupireddy wrote:
> > +             /*
> > +              * Try updating oldest initialized XLog buffer page.
> > +              *
> > +              * Update it if we are initializing an XLog buffer page for 
> > the first
> > +              * time or if XLog buffers are full and we are wrapping 
> > around.
> > +              */
> > +             if (XLogRecPtrIsInvalid(XLogCtl->OldestInitializedPage) ||
> > +                     (!XLogRecPtrIsInvalid(XLogCtl->OldestInitializedPage) 
> > &&
> > +                      XLogRecPtrToBufIdx(XLogCtl->OldestInitializedPage) 
> > == nextidx))
> > +             {
> > +                     Assert(XLogCtl->OldestInitializedPage < 
> > NewPageBeginPtr);
> > +
> > +                     XLogCtl->OldestInitializedPage = NewPageBeginPtr;
> > +             }
>
> nitpick: I think you can simplify the conditional to
>
>         if (XLogRecPtrIsInvalid(XLogCtl->OldestInitializedPage) ||
>                 XLogRecPtrToBufIdx(XLogCtl->OldestInitializedPage) == nextidx)

Oh, yes, done that.

> It's confusing to me that OldestInitializedPage is set to NewPageBeginPtr.
> Doesn't that set it to the beginning of the newest initialized page?

Yes, that's the intention, see below. OldestInitializedPage points to
the start address of the oldest initialized page whereas the
InitializedUpTo points to the end address of the latest initialized
page. With this, one can easily track all the WAL between
OldestInitializedPage and InitializedUpTo.

+        /*
+         * OldestInitializedPage and InitializedUpTo are always starting and
+         * ending addresses of (same or different) XLog buffer page
+         * respectively. Hence, they can never be same even if there's only one
+         * initialized page in XLog buffers.
+         */
+        Assert(XLogCtl->OldestInitializedPage != XLogCtl->InitializedUpTo);

Thanks for looking at it. I'm attaching v2 patch with the above review
comment addressed for further review.

--
Bharath Rupireddy
PostgreSQL Contributors Team
RDS Open Source Databases
Amazon Web Services: https://aws.amazon.com
From b20ddef1ca852f86cf309417e598f02ff91ff946 Mon Sep 17 00:00:00 2001
From: Bharath Rupireddy <bharath.rupireddyforpostgres@gmail.com>
Date: Tue, 28 Feb 2023 05:22:20 +0000
Subject: [PATCH v2] Track Oldest Initialized WAL Buffer Page

---
 src/backend/access/transam/xlog.c | 169 ++++++++++++++++++++++++++++++
 src/include/access/xlog.h         |   1 +
 2 files changed, 170 insertions(+)

diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index f9f0f6db8d..f4531d3fb5 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -504,6 +504,44 @@ typedef struct XLogCtlData
 	XLogRecPtr *xlblocks;		/* 1st byte ptr-s + XLOG_BLCKSZ */
 	int			XLogCacheBlck;	/* highest allocated xlog buffer index */
 
+	/*
+	 * Start address of oldest initialized page in XLog buffers.
+	 *
+	 * We mainly track oldest initialized page explicitly to quickly tell if a
+	 * given WAL record is available in XLog buffers. It also can be used for
+	 * other purposes, see notes below.
+	 *
+	 * OldestInitializedPage gives XLog buffers following properties:
+	 *
+	 * 1) At any given point of time, pages in XLog buffers array are sorted in
+	 * an ascending order from OldestInitializedPage till InitializedUpTo.
+	 * Note that we verify this property for assert-only builds, see
+	 * IsXLogBuffersArraySorted() for more details.
+	 *
+	 * 2) OldestInitializedPage is monotonically increasing (by virtue of how
+	 * postgres generates WAL records), that is, its value never decreases.
+	 * This property lets someone read its value without a lock. There's no
+	 * problem even if its value is slightly stale i.e. concurrently being
+	 * updated. One can still use it for finding if a given WAL record is
+	 * available in XLog buffers. At worst, one might get false positives (i.e.
+	 * OldestInitializedPage may tell that the WAL record is available in XLog
+	 * buffers, but when one actually looks at it, it isn't really available).
+	 * This is more efficient and performant than acquiring a lock for reading.
+	 * Note that we may not need a lock to read OldestInitializedPage but we
+	 * need to update it holding WALBufMappingLock.
+	 *
+	 * 3) One can start traversing XLog buffers from OldestInitializedPage till
+	 * InitializedUpTo to list out all valid WAL records and stats, and expose
+	 * them via SQL-callable functions to users.
+	 *
+	 * 4) XLog buffers array is inherently organized as a circular, sorted and
+	 * rotated array with OldestInitializedPage as pivot with the property
+	 * where LSN of previous buffer page (if valid) is greater than
+	 * OldestInitializedPage and LSN of next buffer page (if valid) is greater
+	 * than OldestInitializedPage.
+	 */
+	XLogRecPtr	OldestInitializedPage;
+
 	/*
 	 * InsertTimeLineID is the timeline into which new WAL is being inserted
 	 * and flushed. It is zero during recovery, and does not change once set.
@@ -580,6 +618,10 @@ static ControlFileData *ControlFile = NULL;
 #define NextBufIdx(idx)		\
 		(((idx) == XLogCtl->XLogCacheBlck) ? 0 : ((idx) + 1))
 
+/* Macro to retreat to previous buffer index. */
+#define PreviousBufIdx(idx)		\
+		(((idx) == 0) ? XLogCtl->XLogCacheBlck : ((idx) - 1))
+
 /*
  * XLogRecPtrToBufIdx returns the index of the WAL buffer that holds, or
  * would hold if it was in cache, the page containing 'recptr'.
@@ -698,6 +740,10 @@ static void WALInsertLockAcquireExclusive(void);
 static void WALInsertLockRelease(void);
 static void WALInsertLockUpdateInsertingAt(XLogRecPtr insertingAt);
 
+#ifdef USE_ASSERT_CHECKING
+static bool IsXLogBuffersArraySorted(void);
+#endif
+
 /*
  * Insert an XLOG record represented by an already-constructed chain of data
  * chunks.  This is a low-level routine; to construct the WAL record header
@@ -1925,6 +1971,52 @@ AdvanceXLInsertBuffer(XLogRecPtr upto, TimeLineID tli, bool opportunistic)
 		XLogCtl->InitializedUpTo = NewPageEndPtr;
 
 		npages++;
+
+		/*
+		 * Try updating oldest initialized XLog buffer page.
+		 *
+		 * Update it if we are initializing an XLog buffer page for the first
+		 * time or if XLog buffers are full and we are wrapping around.
+		 */
+		if (XLogRecPtrIsInvalid(XLogCtl->OldestInitializedPage) ||
+			XLogRecPtrToBufIdx(XLogCtl->OldestInitializedPage) == nextidx)
+		{
+			Assert(XLogCtl->OldestInitializedPage < NewPageBeginPtr);
+
+			XLogCtl->OldestInitializedPage = NewPageBeginPtr;
+		}
+
+		/*
+		 * Check some properties about XLog buffers array. We essentially
+		 * perform these checks as asserts to avoid extra costs.
+		 *
+		 * XXX: Perhaps these extra checks are too much for an assert build, so
+		 * placing them under WAL_DEBUG might be worth trying.
+		 */
+
+		/* OldestInitializedPage must have already been initialized. */
+		Assert(!XLogRecPtrIsInvalid(XLogCtl->OldestInitializedPage));
+
+		/*
+		 * OldestInitializedPage is always a starting address of XLog buffer
+		 * page.
+		 */
+		Assert((XLogCtl->OldestInitializedPage % XLOG_BLCKSZ) == 0);
+
+		/*
+		 * OldestInitializedPage and InitializedUpTo are always starting and
+		 * ending addresses of (same or different) XLog buffer page
+		 * respectively. Hence, they can never be same even if there's only one
+		 * initialized page in XLog buffers.
+		 */
+		Assert(XLogCtl->OldestInitializedPage != XLogCtl->InitializedUpTo);
+
+		/*
+		 * At any given point of time, pages in XLog buffers array are sorted
+		 * in an ascending order from OldestInitializedPage till
+		 * InitializedUpTo.
+		 */
+		Assert(IsXLogBuffersArraySorted());
 	}
 	LWLockRelease(WALBufMappingLock);
 
@@ -4616,6 +4708,7 @@ XLOGShmemInit(void)
 	XLogCtl->SharedRecoveryState = RECOVERY_STATE_CRASH;
 	XLogCtl->InstallXLogFileSegmentActive = false;
 	XLogCtl->WalWriterSleeping = false;
+	XLogCtl->OldestInitializedPage = InvalidXLogRecPtr;
 
 	SpinLockInit(&XLogCtl->Insert.insertpos_lck);
 	SpinLockInit(&XLogCtl->info_lck);
@@ -5622,6 +5715,14 @@ StartupXLOG(void)
 
 		XLogCtl->xlblocks[firstIdx] = endOfRecoveryInfo->lastPageBeginPtr + XLOG_BLCKSZ;
 		XLogCtl->InitializedUpTo = endOfRecoveryInfo->lastPageBeginPtr + XLOG_BLCKSZ;
+		XLogCtl->OldestInitializedPage = endOfRecoveryInfo->lastPageBeginPtr;
+
+		/*
+		 * OldestInitializedPage is always a starting address of XLog buffer
+		 * page.
+		 */
+		Assert(!XLogRecPtrIsInvalid(XLogCtl->OldestInitializedPage));
+		Assert((XLogCtl->OldestInitializedPage % XLOG_BLCKSZ) == 0);
 	}
 	else
 	{
@@ -8931,3 +9032,71 @@ SetWalWriterSleeping(bool sleeping)
 	XLogCtl->WalWriterSleeping = sleeping;
 	SpinLockRelease(&XLogCtl->info_lck);
 }
+
+#ifdef USE_ASSERT_CHECKING
+/*
+ * Returns whether or not XLog buffers array is sorted.
+ *
+ * XXX: Perhaps this function is too much for an assert build, so placing it
+ * under WAL_DEBUG might be worth trying.
+ */
+static bool
+IsXLogBuffersArraySorted(void)
+{
+	int	start;
+	int	end;
+	int	current;
+	int	next;
+	XLogRecPtr CurrentPage;
+	XLogRecPtr	NextPage;
+
+	start = XLogRecPtrToBufIdx(XLogCtl->OldestInitializedPage);
+	end = XLogRecPtrToBufIdx(XLogCtl->InitializedUpTo - XLOG_BLCKSZ);
+
+	if (start == end)
+		return true;
+
+	current = start;
+
+	while (current != end)
+	{
+		CurrentPage = XLogCtl->xlblocks[current];
+
+		next = NextBufIdx(current);
+		NextPage = XLogCtl->xlblocks[next];
+
+		if (!XLogRecPtrIsInvalid(NextPage) &&
+			CurrentPage > NextPage)
+			return false;
+
+		current = next;
+	}
+
+	Assert(XLogCtl->xlblocks[current] == XLogCtl->xlblocks[end]);
+
+	return true;
+}
+#endif
+
+/*
+ * Returns whether or not a given WAL record is available in XLog buffers.
+ *
+ * Note that we don't read OldestInitializedPage under a lock, see description
+ * near its definition in xlog.c for more details.
+ *
+ * Note that caller needs to pass in an LSN known to the server, not a future
+ * or unwritten or unflushed LSN.
+ */
+bool
+IsWALRecordAvailableInXLogBuffers(XLogRecPtr lsn)
+{
+	if (!XLogRecPtrIsInvalid(lsn) &&
+		!XLogRecPtrIsInvalid(XLogCtl->OldestInitializedPage) &&
+		lsn >= XLogCtl->OldestInitializedPage &&
+		lsn < XLogCtl->InitializedUpTo)
+	{
+		return true;
+	}
+
+	return false;
+}
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index cfe5409738..2afa53008e 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -257,6 +257,7 @@ extern void ReachedEndOfBackup(XLogRecPtr EndRecPtr, TimeLineID tli);
 extern void SetInstallXLogFileSegmentActive(void);
 extern bool IsInstallXLogFileSegmentActive(void);
 extern void XLogShutdownWalRcv(void);
+extern bool	IsWALRecordAvailableInXLogBuffers(XLogRecPtr lsn);
 
 /*
  * Routines to start, stop, and get status of a base backup.
-- 
2.34.1

Reply via email to