Hi,

Thank you for looking into this!

On Wed, 27 Nov 2024 at 16:50, Matheus Alcantara <mths....@pm.me> wrote:
> I've executed the same test of 5 databases with each of them having 1 table of
> 3GB of size and I've got very similar results.
>
> I've also tested using a single database with 4 tables with ~60GB of size and
> the results compared with master was more closer but still an improvement. 
> Note
> that I've also increased the default shared_buffers to 7GB to see how it works
> with large buffer pools.
>   - patched: 5.4259 s
>   - master: 5.53186 s

Thanks for the testing.

> Not to much to say about the code, I'm currently learning more about the read
> stream API and Postgresql hacking itself. Just some minor points and questions
> about the patches.
>
>
> v2-0002-Count-free-buffers-at-the-start-of-the-autoprewar.patch
> --- a/src/backend/storage/buffer/freelist.c
> +/*
> + * get_number_of_free_buffers -- a lockless way to get the number of free
> + *                                                              buffers in 
> buffer pool.
> + *
> + * Note that result continuosly changes as free buffers are moved out by 
> other
> + * operations.
> + */
> +int
> +get_number_of_free_buffers(void)
>
> typo on continuosly -> continuously

Done.

> v2-0001-Use-read-stream-in-autoprewarm.patch
> +       bool       *rs_have_free_buffer = per_buffer_data;
> +
> +
> +       *rs_have_free_buffer = true;
> +
>
> Not sure if I understand why this variable is needed, it seems that it is only
> written and never read? Just as comparison, the block_range_read_stream_cb
> callback used on pg_prewarm seems to not use the per_buffer_data parameter.

Actually, it is read in the main loop of the
autoprewarm_database_main() function:

        /* There are no free buffers left in shared buffers, break the loop. */
        else if (!(*rs_have_free_buffer))
            break;

apw_read_stream_next_block() callback function sets
rs_have_free_buffer's value to false when there is no free buffer left
in the shared buffers. And the code above terminates the main loop in
the autoprewarm_database_main() function when it is set to false.

block_range_read_stream_cb() callback is used when the callback only
needs to loop over the block numbers. However, for the autoprewarm
case; the callback function needs to do additional checks so another
callback and the use of this variable are required.

v3 is attached.

--
Regards,
Nazir Bilal Yavuz
Microsoft
From f5a8faa6ac3f836784c15b776633e7729d95378d Mon Sep 17 00:00:00 2001
From: Nazir Bilal Yavuz <byavu...@gmail.com>
Date: Tue, 5 Nov 2024 11:40:14 +0300
Subject: [PATCH v3 1/2] Use read stream in autoprewarm

Instead of reading blocks with ReadBufferExtended(), create read stream
object for each possible case and use it.

This change provides about 10% performance improvement.
---
 contrib/pg_prewarm/autoprewarm.c | 112 +++++++++++++++++++++++++++++--
 1 file changed, 107 insertions(+), 5 deletions(-)

diff --git a/contrib/pg_prewarm/autoprewarm.c b/contrib/pg_prewarm/autoprewarm.c
index fac4051e1aa..8291bf3c427 100644
--- a/contrib/pg_prewarm/autoprewarm.c
+++ b/contrib/pg_prewarm/autoprewarm.c
@@ -41,6 +41,7 @@
 #include "storage/latch.h"
 #include "storage/lwlock.h"
 #include "storage/procsignal.h"
+#include "storage/read_stream.h"
 #include "storage/smgr.h"
 #include "tcop/tcopprot.h"
 #include "utils/guc.h"
@@ -422,6 +423,68 @@ apw_load_buffers(void)
 						apw_state->prewarmed_blocks, num_elements)));
 }
 
+struct apw_read_stream_private
+{
+	bool		first_block;
+	int			max_pos;
+	int			pos;
+	BlockInfoRecord *block_info;
+	BlockNumber nblocks_in_fork;
+
+};
+
+static BlockNumber
+apw_read_stream_next_block(ReadStream *stream,
+						   void *callback_private_data,
+						   void *per_buffer_data)
+{
+	struct apw_read_stream_private *p = callback_private_data;
+	bool	   *rs_have_free_buffer = per_buffer_data;
+	BlockInfoRecord *old_blk;
+	BlockInfoRecord *cur_blk;
+
+	*rs_have_free_buffer = true;
+
+	/*
+	 * There may still be queued blocks in the stream even when no free
+	 * buffers are available in the buffer pool. This can lead to unnecessary
+	 * I/O operations and buffer evictions. One possible solution is to
+	 * compare the number of free buffers in the buffer pool with the number
+	 * of queued blocks in the stream. However, this approach is considered a
+	 * workaround and would add complexity with minimal benefit, as only a few
+	 * unnecessary I/O operations and buffer evictions are expected.
+	 * Therefore, this solution has not been implemented.
+	 */
+	if (!have_free_buffer())
+	{
+		*rs_have_free_buffer = false;
+		return InvalidBlockNumber;
+	}
+
+	if (p->pos == p->max_pos)
+		return InvalidBlockNumber;
+
+	if (p->first_block)
+	{
+		p->first_block = false;
+		return p->block_info[p->pos++].blocknum;
+	}
+
+	old_blk = &(p->block_info[p->pos - 1]);
+	cur_blk = &(p->block_info[p->pos]);
+
+	if (old_blk->database == cur_blk->database &&
+		old_blk->forknum == cur_blk->forknum &&
+		old_blk->filenumber == cur_blk->filenumber &&
+		cur_blk->blocknum < p->nblocks_in_fork)
+	{
+		p->pos++;
+		return cur_blk->blocknum;
+	}
+
+	return InvalidBlockNumber;
+}
+
 /*
  * Prewarm all blocks for one database (and possibly also global objects, if
  * those got grouped with this database).
@@ -435,6 +498,9 @@ autoprewarm_database_main(Datum main_arg)
 	BlockNumber nblocks = 0;
 	BlockInfoRecord *old_blk = NULL;
 	dsm_segment *seg;
+	ReadStream *stream = NULL;
+	struct apw_read_stream_private p;
+	bool	   *rs_have_free_buffer;
 
 	/* Establish signal handlers; once that's done, unblock signals. */
 	pqsignal(SIGTERM, die);
@@ -451,13 +517,16 @@ autoprewarm_database_main(Datum main_arg)
 	block_info = (BlockInfoRecord *) dsm_segment_address(seg);
 	pos = apw_state->prewarm_start_idx;
 
+	p.block_info = block_info;
+	p.max_pos = apw_state->prewarm_stop_idx;
+
 	/*
 	 * Loop until we run out of blocks to prewarm or until we run out of free
 	 * buffers.
 	 */
-	while (pos < apw_state->prewarm_stop_idx && have_free_buffer())
+	for (; pos < apw_state->prewarm_stop_idx; pos++)
 	{
-		BlockInfoRecord *blk = &block_info[pos++];
+		BlockInfoRecord *blk = &block_info[pos];
 		Buffer		buf;
 
 		CHECK_FOR_INTERRUPTS();
@@ -470,6 +539,18 @@ autoprewarm_database_main(Datum main_arg)
 			old_blk->database != 0)
 			break;
 
+		/*
+		 * If stream needs to be created again, end it before closing the old
+		 * relation.
+		 */
+		if (stream && (old_blk == NULL ||
+					   old_blk->filenumber != blk->filenumber ||
+					   old_blk->forknum != blk->forknum))
+		{
+			Assert(read_stream_next_buffer(stream, (void **) &rs_have_free_buffer) == InvalidBuffer);
+			read_stream_end(stream);
+		}
+
 		/*
 		 * As soon as we encounter a block of a new relation, close the old
 		 * relation. Note that rel will be NULL if try_relation_open failed
@@ -506,7 +587,10 @@ autoprewarm_database_main(Datum main_arg)
 			continue;
 		}
 
-		/* Once per fork, check for fork existence and size. */
+		/*
+		 * Once per fork, check for fork existence and size. Then create read
+		 * stream if it is suitable.
+		 */
 		if (old_blk == NULL ||
 			old_blk->filenumber != blk->filenumber ||
 			old_blk->forknum != blk->forknum)
@@ -518,7 +602,21 @@ autoprewarm_database_main(Datum main_arg)
 			if (blk->forknum > InvalidForkNumber &&
 				blk->forknum <= MAX_FORKNUM &&
 				smgrexists(RelationGetSmgr(rel), blk->forknum))
+			{
 				nblocks = RelationGetNumberOfBlocksInFork(rel, blk->forknum);
+
+				/* Create read stream. */
+				p.nblocks_in_fork = nblocks;
+				p.pos = pos;
+				p.first_block = true;
+				stream = read_stream_begin_relation(READ_STREAM_FULL,
+													NULL,
+													rel,
+													blk->forknum,
+													apw_read_stream_next_block,
+													&p,
+													sizeof(bool));
+			}
 			else
 				nblocks = 0;
 		}
@@ -532,16 +630,20 @@ autoprewarm_database_main(Datum main_arg)
 		}
 
 		/* Prewarm buffer. */
-		buf = ReadBufferExtended(rel, blk->forknum, blk->blocknum, RBM_NORMAL,
-								 NULL);
+		buf = read_stream_next_buffer(stream, (void **) &rs_have_free_buffer);
 		if (BufferIsValid(buf))
 		{
 			apw_state->prewarmed_blocks++;
 			ReleaseBuffer(buf);
 		}
+		/* There are no free buffers left in shared buffers, break the loop. */
+		else if (!(*rs_have_free_buffer))
+			break;
 
 		old_blk = blk;
 	}
+	Assert(read_stream_next_buffer(stream, (void **) &rs_have_free_buffer) == InvalidBuffer);
+	read_stream_end(stream);
 
 	dsm_detach(seg);
 
-- 
2.45.2

From 15345de76b1006cb421d84abe3b03fbe60c754e4 Mon Sep 17 00:00:00 2001
From: Nazir Bilal Yavuz <byavu...@gmail.com>
Date: Wed, 27 Nov 2024 16:53:58 +0300
Subject: [PATCH v3 2/2] Count free buffers at the start of the autoprewarm

Streamified version of the autoprewarm code may do unnecessary I/O and
buffer evicting. To prevent it at least a little bit, count the number
of free buffers in the buffer pool and queue buffers up to that number
in the callback function of the autoprewarm.
---
 src/include/storage/buf_internals.h   |  1 +
 src/backend/storage/buffer/freelist.c | 17 +++++++++++++++++
 contrib/pg_prewarm/autoprewarm.c      |  4 ++--
 3 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/src/include/storage/buf_internals.h b/src/include/storage/buf_internals.h
index eda6c699212..d95050adb23 100644
--- a/src/include/storage/buf_internals.h
+++ b/src/include/storage/buf_internals.h
@@ -432,6 +432,7 @@ extern void StrategyNotifyBgWriter(int bgwprocno);
 extern Size StrategyShmemSize(void);
 extern void StrategyInitialize(bool init);
 extern bool have_free_buffer(void);
+extern int	get_number_of_free_buffers(void);
 
 /* buf_table.c */
 extern Size BufTableShmemSize(int size);
diff --git a/src/backend/storage/buffer/freelist.c b/src/backend/storage/buffer/freelist.c
index dffdd57e9b5..3259e6280cf 100644
--- a/src/backend/storage/buffer/freelist.c
+++ b/src/backend/storage/buffer/freelist.c
@@ -180,6 +180,23 @@ have_free_buffer(void)
 		return false;
 }
 
+/*
+ * get_number_of_free_buffers -- a lockless way to get the number of free
+ *								 buffers in buffer pool.
+ *
+ * Note that result continuously changes as free buffers are moved out by other
+ * operations.
+ */
+int
+get_number_of_free_buffers(void)
+{
+	/* All the buffers are free. */
+	if (StrategyControl->firstFreeBuffer < 0)
+		return NBuffers;
+	else
+		return StrategyControl->lastFreeBuffer - StrategyControl->firstFreeBuffer;
+}
+
 /*
  * StrategyGetBuffer
  *
diff --git a/contrib/pg_prewarm/autoprewarm.c b/contrib/pg_prewarm/autoprewarm.c
index 8291bf3c427..16174bf3d0f 100644
--- a/contrib/pg_prewarm/autoprewarm.c
+++ b/contrib/pg_prewarm/autoprewarm.c
@@ -518,13 +518,13 @@ autoprewarm_database_main(Datum main_arg)
 	pos = apw_state->prewarm_start_idx;
 
 	p.block_info = block_info;
-	p.max_pos = apw_state->prewarm_stop_idx;
+	p.max_pos = Min(apw_state->prewarm_stop_idx, pos + get_number_of_free_buffers());
 
 	/*
 	 * Loop until we run out of blocks to prewarm or until we run out of free
 	 * buffers.
 	 */
-	for (; pos < apw_state->prewarm_stop_idx; pos++)
+	for (; pos < p.max_pos; pos++)
 	{
 		BlockInfoRecord *blk = &block_info[pos];
 		Buffer		buf;
-- 
2.45.2

Reply via email to