Hi,

I am working on using the read stream in pg_visibility. There are two
places to use it:

1- collect_visibility_data()

This one is straightforward. I created a read stream object if
'include_pd' is true because read I/O is done when 'include_pd' is
true. There is ~4% timing improvement with this change. I started the
server with the default settings and created a 6 GB table. Then run
100 times pg_visibility() by clearing the OS cache between each run.
----------

2- collect_corrupt_items()

This one is more complicated. The read stream callback function loops
until it finds a suitable block to read. So, if the callback returns
an InvalidBlockNumber; it means that the stream processed all possible
blocks and the stream should be finished. There is ~3% timing
improvement with this change. I started the server with the default
settings and created a 6 GB table. Then run 100 times
pg_check_visible() by clearing the OS cache between each run.

The downside of this approach is there are too many "vmbuffer is valid
but BufferGetBlockNumber(*vmbuffer) is not equal to mapBlock, so
vmbuffer needs to be read again" cases in the read stream version (700
vs 20 for the 6 GB table). This is caused by the callback function of
the read stream reading a new vmbuf while getting next block numbers.
So, vmbuf is wrong when we are checking visibility map bits that might
have changed while we were acquiring the page lock.
----------

Both patches are attached.

Any feedback would be appreciated.

-- 
Regards,
Nazir Bilal Yavuz
Microsoft
From c178e010759f042dc089fd4c5b2283a04b95f017 Mon Sep 17 00:00:00 2001
From: Nazir Bilal Yavuz <byavu...@gmail.com>
Date: Fri, 9 Aug 2024 11:41:52 +0300
Subject: [PATCH v1 1/2] Use read stream in pg_visibility in
 collect_visibility_data()

Instead of reading blocks with ReadBufferExtended() in
collect_visibility_data(), use read stream.

This change provides about 4% performance improvement.
---
 contrib/pg_visibility/pg_visibility.c | 53 ++++++++++++++++++++++++++-
 1 file changed, 51 insertions(+), 2 deletions(-)

diff --git a/contrib/pg_visibility/pg_visibility.c b/contrib/pg_visibility/pg_visibility.c
index 1a1a4ff7be7..10f961c58f0 100644
--- a/contrib/pg_visibility/pg_visibility.c
+++ b/contrib/pg_visibility/pg_visibility.c
@@ -21,6 +21,7 @@
 #include "storage/bufmgr.h"
 #include "storage/proc.h"
 #include "storage/procarray.h"
+#include "storage/read_stream.h"
 #include "storage/smgr.h"
 #include "utils/rel.h"
 #include "utils/snapmgr.h"
@@ -41,6 +42,16 @@ typedef struct corrupt_items
 	ItemPointer tids;
 } corrupt_items;
 
+/*
+ * Helper struct for read stream object used in
+ * collect_visibility_data() function.
+ */
+struct collect_visibility_data_read_stream_private
+{
+	BlockNumber blocknum;
+	BlockNumber nblocks;
+};
+
 PG_FUNCTION_INFO_V1(pg_visibility_map);
 PG_FUNCTION_INFO_V1(pg_visibility_map_rel);
 PG_FUNCTION_INFO_V1(pg_visibility);
@@ -463,6 +474,23 @@ pg_visibility_tupdesc(bool include_blkno, bool include_pd)
 	return BlessTupleDesc(tupdesc);
 }
 
+/*
+ * Callback function to get next block for read stream object used in
+ * collect_visibility_data() function.
+ */
+static BlockNumber
+collect_visibility_data_read_stream_next_block(ReadStream *stream,
+											   void *callback_private_data,
+											   void *per_buffer_data)
+{
+	struct collect_visibility_data_read_stream_private *p = callback_private_data;
+
+	if (p->blocknum < p->nblocks)
+		return p->blocknum++;
+
+	return InvalidBlockNumber;
+}
+
 /*
  * Collect visibility data about a relation.
  *
@@ -478,6 +506,8 @@ collect_visibility_data(Oid relid, bool include_pd)
 	BlockNumber blkno;
 	Buffer		vmbuffer = InvalidBuffer;
 	BufferAccessStrategy bstrategy = GetAccessStrategy(BAS_BULKREAD);
+	struct collect_visibility_data_read_stream_private p;
+	ReadStream *stream = NULL;
 
 	rel = relation_open(relid, AccessShareLock);
 
@@ -489,6 +519,20 @@ collect_visibility_data(Oid relid, bool include_pd)
 	info->next = 0;
 	info->count = nblocks;
 
+	/* Crate a read stream if read will be done. */
+	if (include_pd)
+	{
+		p.blocknum = 0;
+		p.nblocks = nblocks;
+		stream = read_stream_begin_relation(READ_STREAM_FULL,
+											bstrategy,
+											rel,
+											MAIN_FORKNUM,
+											collect_visibility_data_read_stream_next_block,
+											&p,
+											0);
+	}
+
 	for (blkno = 0; blkno < nblocks; ++blkno)
 	{
 		int32		mapbits;
@@ -513,8 +557,7 @@ collect_visibility_data(Oid relid, bool include_pd)
 			Buffer		buffer;
 			Page		page;
 
-			buffer = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL,
-										bstrategy);
+			buffer = read_stream_next_buffer(stream, NULL);
 			LockBuffer(buffer, BUFFER_LOCK_SHARE);
 
 			page = BufferGetPage(buffer);
@@ -525,6 +568,12 @@ collect_visibility_data(Oid relid, bool include_pd)
 		}
 	}
 
+	if (include_pd && stream)
+	{
+		Assert(read_stream_next_buffer(stream, NULL) == InvalidBuffer);
+		read_stream_end(stream);
+	}
+
 	/* Clean up. */
 	if (vmbuffer != InvalidBuffer)
 		ReleaseBuffer(vmbuffer);
-- 
2.45.2

From 456d1df641fd6e81658fd9288e1f81b9ce068708 Mon Sep 17 00:00:00 2001
From: Nazir Bilal Yavuz <byavu...@gmail.com>
Date: Tue, 13 Aug 2024 13:03:39 +0300
Subject: [PATCH v1 2/2] Use read stream in pg_visibility in
 collect_corrupt_items()

Instead of reading blocks with ReadBufferExtended() in
collect_corrupt_items(), use read stream.

This change provides about 3% performance improvement.
---
 contrib/pg_visibility/pg_visibility.c | 93 +++++++++++++++++++++++----
 1 file changed, 81 insertions(+), 12 deletions(-)

diff --git a/contrib/pg_visibility/pg_visibility.c b/contrib/pg_visibility/pg_visibility.c
index 10f961c58f0..4729ee95991 100644
--- a/contrib/pg_visibility/pg_visibility.c
+++ b/contrib/pg_visibility/pg_visibility.c
@@ -52,6 +52,20 @@ struct collect_visibility_data_read_stream_private
 	BlockNumber nblocks;
 };
 
+/*
+ * Helper struct for read stream object used in
+ * collect_corrupt_items() function.
+ */
+struct collect_corrupt_items_read_stream_private
+{
+	bool		all_frozen;
+	bool		all_visible;
+	BlockNumber blocknum;
+	BlockNumber nblocks;
+	Relation	rel;
+	Buffer	   *vmbuffer;
+};
+
 PG_FUNCTION_INFO_V1(pg_visibility_map);
 PG_FUNCTION_INFO_V1(pg_visibility_map_rel);
 PG_FUNCTION_INFO_V1(pg_visibility);
@@ -639,6 +653,35 @@ GetStrictOldestNonRemovableTransactionId(Relation rel)
 	}
 }
 
+/*
+ * Callback function to get next block for read stream object used in
+ * collect_corrupt_items() function.
+ */
+static BlockNumber
+collect_corrupt_items_read_stream_next_block(ReadStream *stream,
+											 void *callback_private_data,
+											 void *per_buffer_data)
+{
+	struct collect_corrupt_items_read_stream_private *p = callback_private_data;
+
+	for (; p->blocknum < p->nblocks; p->blocknum++)
+	{
+		bool		check_frozen = false;
+		bool		check_visible = false;
+
+		if (p->all_frozen && VM_ALL_FROZEN(p->rel, p->blocknum, p->vmbuffer))
+			check_frozen = true;
+		if (p->all_visible && VM_ALL_VISIBLE(p->rel, p->blocknum, p->vmbuffer))
+			check_visible = true;
+		if (!check_visible && !check_frozen)
+			continue;
+
+		return p->blocknum++;
+	}
+
+	return InvalidBlockNumber;
+}
+
 /*
  * Returns a list of items whose visibility map information does not match
  * the status of the tuples on the page.
@@ -663,6 +706,10 @@ collect_corrupt_items(Oid relid, bool all_visible, bool all_frozen)
 	Buffer		vmbuffer = InvalidBuffer;
 	BufferAccessStrategy bstrategy = GetAccessStrategy(BAS_BULKREAD);
 	TransactionId OldestXmin = InvalidTransactionId;
+	struct collect_corrupt_items_read_stream_private p;
+	ReadStream *stream;
+
+	Assert(all_visible || all_frozen);
 
 	rel = relation_open(relid, AccessShareLock);
 
@@ -687,6 +734,20 @@ collect_corrupt_items(Oid relid, bool all_visible, bool all_frozen)
 	items->count = 64;
 	items->tids = palloc(items->count * sizeof(ItemPointerData));
 
+	p.blocknum = 0;
+	p.nblocks = nblocks;
+	p.rel = rel;
+	p.vmbuffer = &vmbuffer;
+	p.all_frozen = all_frozen;
+	p.all_visible = all_visible;
+	stream = read_stream_begin_relation(READ_STREAM_FULL,
+										bstrategy,
+										rel,
+										MAIN_FORKNUM,
+										collect_corrupt_items_read_stream_next_block,
+										&p,
+										0);
+
 	/* Loop over every block in the relation. */
 	for (blkno = 0; blkno < nblocks; ++blkno)
 	{
@@ -700,17 +761,20 @@ collect_corrupt_items(Oid relid, bool all_visible, bool all_frozen)
 		/* Make sure we are interruptible. */
 		CHECK_FOR_INTERRUPTS();
 
-		/* Use the visibility map to decide whether to check this page. */
-		if (all_frozen && VM_ALL_FROZEN(rel, blkno, &vmbuffer))
-			check_frozen = true;
-		if (all_visible && VM_ALL_VISIBLE(rel, blkno, &vmbuffer))
-			check_visible = true;
-		if (!check_visible && !check_frozen)
-			continue;
-
 		/* Read and lock the page. */
-		buffer = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL,
-									bstrategy);
+		buffer = read_stream_next_buffer(stream, NULL);
+
+		/*
+		 * If the read stream returns an InvalidBuffer, this means all the
+		 * blocks are processed. So, end the stream and loop.
+		 */
+		if (buffer == InvalidBuffer)
+		{
+			read_stream_end(stream);
+			stream = NULL;
+			break;
+		}
+
 		LockBuffer(buffer, BUFFER_LOCK_SHARE);
 
 		page = BufferGetPage(buffer);
@@ -720,9 +784,9 @@ collect_corrupt_items(Oid relid, bool all_visible, bool all_frozen)
 		 * The visibility map bits might have changed while we were acquiring
 		 * the page lock.  Recheck to avoid returning spurious results.
 		 */
-		if (check_frozen && !VM_ALL_FROZEN(rel, blkno, &vmbuffer))
+		if (all_frozen && !VM_ALL_FROZEN(rel, blkno, &vmbuffer))
 			check_frozen = false;
-		if (check_visible && !VM_ALL_VISIBLE(rel, blkno, &vmbuffer))
+		if (all_visible && !VM_ALL_VISIBLE(rel, blkno, &vmbuffer))
 			check_visible = false;
 		if (!check_visible && !check_frozen)
 		{
@@ -807,6 +871,11 @@ collect_corrupt_items(Oid relid, bool all_visible, bool all_frozen)
 
 		UnlockReleaseBuffer(buffer);
 	}
+	if (stream)
+	{
+		Assert(read_stream_next_buffer(stream, NULL) == InvalidBuffer);
+		read_stream_end(stream);
+	}
 
 	/* Clean up. */
 	if (vmbuffer != InvalidBuffer)
-- 
2.45.2

Reply via email to