Hi, On Fri, 1 Nov 2024 at 21:06, Andrey M. Borodin <x4...@yandex-team.ru> wrote: > > > On 1 Nov 2024, at 12:51, Nazir Bilal Yavuz <byavu...@gmail.com> wrote: > > > > am not > > sure whether 'BufferStrategyControl.lastFreeBuffer - > > BufferStrategyControl.firstFreeBuffer' is safe to use. > > Ugh... it will work. But it seems to me too dirty hack. There's no scalable > way to know size of a free list. > Let's just comment that we might read some more buffers if database does not > fit into memory? > Alternatively we can count size of a free list on the start.
I agree that it is too dirty to hack. There is a minor problem with the counting size of a free list on the start. There may be other processes that fill the buffer pool concurrently, so we can still end up doing unnecessary I/Os. That said, I believe this approach remains an improvement. The first patch includes the comment you suggested, and the second patch implements counting size of a free list on the start. -- Regards, Nazir Bilal Yavuz Microsoft
From bcdb55f237945072cb5740392515af701cfdc1d2 Mon Sep 17 00:00:00 2001 From: Nazir Bilal Yavuz <byavu...@gmail.com> Date: Tue, 5 Nov 2024 11:40:14 +0300 Subject: [PATCH v2 1/2] Use read stream in autoprewarm Instead of reading blocks with ReadBufferExtended(), create read stream object for each possible case and use it. This change provides about 10% performance improvement. --- contrib/pg_prewarm/autoprewarm.c | 112 +++++++++++++++++++++++++++++-- 1 file changed, 107 insertions(+), 5 deletions(-) diff --git a/contrib/pg_prewarm/autoprewarm.c b/contrib/pg_prewarm/autoprewarm.c index fac4051e1aa..8291bf3c427 100644 --- a/contrib/pg_prewarm/autoprewarm.c +++ b/contrib/pg_prewarm/autoprewarm.c @@ -41,6 +41,7 @@ #include "storage/latch.h" #include "storage/lwlock.h" #include "storage/procsignal.h" +#include "storage/read_stream.h" #include "storage/smgr.h" #include "tcop/tcopprot.h" #include "utils/guc.h" @@ -422,6 +423,68 @@ apw_load_buffers(void) apw_state->prewarmed_blocks, num_elements))); } +struct apw_read_stream_private +{ + bool first_block; + int max_pos; + int pos; + BlockInfoRecord *block_info; + BlockNumber nblocks_in_fork; + +}; + +static BlockNumber +apw_read_stream_next_block(ReadStream *stream, + void *callback_private_data, + void *per_buffer_data) +{ + struct apw_read_stream_private *p = callback_private_data; + bool *rs_have_free_buffer = per_buffer_data; + BlockInfoRecord *old_blk; + BlockInfoRecord *cur_blk; + + *rs_have_free_buffer = true; + + /* + * There may still be queued blocks in the stream even when no free + * buffers are available in the buffer pool. This can lead to unnecessary + * I/O operations and buffer evictions. One possible solution is to + * compare the number of free buffers in the buffer pool with the number + * of queued blocks in the stream. However, this approach is considered a + * workaround and would add complexity with minimal benefit, as only a few + * unnecessary I/O operations and buffer evictions are expected. + * Therefore, this solution has not been implemented. + */ + if (!have_free_buffer()) + { + *rs_have_free_buffer = false; + return InvalidBlockNumber; + } + + if (p->pos == p->max_pos) + return InvalidBlockNumber; + + if (p->first_block) + { + p->first_block = false; + return p->block_info[p->pos++].blocknum; + } + + old_blk = &(p->block_info[p->pos - 1]); + cur_blk = &(p->block_info[p->pos]); + + if (old_blk->database == cur_blk->database && + old_blk->forknum == cur_blk->forknum && + old_blk->filenumber == cur_blk->filenumber && + cur_blk->blocknum < p->nblocks_in_fork) + { + p->pos++; + return cur_blk->blocknum; + } + + return InvalidBlockNumber; +} + /* * Prewarm all blocks for one database (and possibly also global objects, if * those got grouped with this database). @@ -435,6 +498,9 @@ autoprewarm_database_main(Datum main_arg) BlockNumber nblocks = 0; BlockInfoRecord *old_blk = NULL; dsm_segment *seg; + ReadStream *stream = NULL; + struct apw_read_stream_private p; + bool *rs_have_free_buffer; /* Establish signal handlers; once that's done, unblock signals. */ pqsignal(SIGTERM, die); @@ -451,13 +517,16 @@ autoprewarm_database_main(Datum main_arg) block_info = (BlockInfoRecord *) dsm_segment_address(seg); pos = apw_state->prewarm_start_idx; + p.block_info = block_info; + p.max_pos = apw_state->prewarm_stop_idx; + /* * Loop until we run out of blocks to prewarm or until we run out of free * buffers. */ - while (pos < apw_state->prewarm_stop_idx && have_free_buffer()) + for (; pos < apw_state->prewarm_stop_idx; pos++) { - BlockInfoRecord *blk = &block_info[pos++]; + BlockInfoRecord *blk = &block_info[pos]; Buffer buf; CHECK_FOR_INTERRUPTS(); @@ -470,6 +539,18 @@ autoprewarm_database_main(Datum main_arg) old_blk->database != 0) break; + /* + * If stream needs to be created again, end it before closing the old + * relation. + */ + if (stream && (old_blk == NULL || + old_blk->filenumber != blk->filenumber || + old_blk->forknum != blk->forknum)) + { + Assert(read_stream_next_buffer(stream, (void **) &rs_have_free_buffer) == InvalidBuffer); + read_stream_end(stream); + } + /* * As soon as we encounter a block of a new relation, close the old * relation. Note that rel will be NULL if try_relation_open failed @@ -506,7 +587,10 @@ autoprewarm_database_main(Datum main_arg) continue; } - /* Once per fork, check for fork existence and size. */ + /* + * Once per fork, check for fork existence and size. Then create read + * stream if it is suitable. + */ if (old_blk == NULL || old_blk->filenumber != blk->filenumber || old_blk->forknum != blk->forknum) @@ -518,7 +602,21 @@ autoprewarm_database_main(Datum main_arg) if (blk->forknum > InvalidForkNumber && blk->forknum <= MAX_FORKNUM && smgrexists(RelationGetSmgr(rel), blk->forknum)) + { nblocks = RelationGetNumberOfBlocksInFork(rel, blk->forknum); + + /* Create read stream. */ + p.nblocks_in_fork = nblocks; + p.pos = pos; + p.first_block = true; + stream = read_stream_begin_relation(READ_STREAM_FULL, + NULL, + rel, + blk->forknum, + apw_read_stream_next_block, + &p, + sizeof(bool)); + } else nblocks = 0; } @@ -532,16 +630,20 @@ autoprewarm_database_main(Datum main_arg) } /* Prewarm buffer. */ - buf = ReadBufferExtended(rel, blk->forknum, blk->blocknum, RBM_NORMAL, - NULL); + buf = read_stream_next_buffer(stream, (void **) &rs_have_free_buffer); if (BufferIsValid(buf)) { apw_state->prewarmed_blocks++; ReleaseBuffer(buf); } + /* There are no free buffers left in shared buffers, break the loop. */ + else if (!(*rs_have_free_buffer)) + break; old_blk = blk; } + Assert(read_stream_next_buffer(stream, (void **) &rs_have_free_buffer) == InvalidBuffer); + read_stream_end(stream); dsm_detach(seg); -- 2.45.2
From 2905f74049f77ab9c4f406dfe28b29ef4eafb7b4 Mon Sep 17 00:00:00 2001 From: Nazir Bilal Yavuz <byavu...@gmail.com> Date: Tue, 5 Nov 2024 13:00:11 +0300 Subject: [PATCH v2 2/2] Count free buffers at the start of the autoprewarm Streamified version of the autoprewarm code may do unnecessary I/O and buffer evicting. To prevent it at least a little bit, count the number of free buffers in the buffer pool and queue buffers up to that number in the callback function of the autoprewarm. --- src/include/storage/buf_internals.h | 1 + src/backend/storage/buffer/freelist.c | 17 +++++++++++++++++ contrib/pg_prewarm/autoprewarm.c | 4 ++-- 3 files changed, 20 insertions(+), 2 deletions(-) diff --git a/src/include/storage/buf_internals.h b/src/include/storage/buf_internals.h index eda6c699212..d95050adb23 100644 --- a/src/include/storage/buf_internals.h +++ b/src/include/storage/buf_internals.h @@ -432,6 +432,7 @@ extern void StrategyNotifyBgWriter(int bgwprocno); extern Size StrategyShmemSize(void); extern void StrategyInitialize(bool init); extern bool have_free_buffer(void); +extern int get_number_of_free_buffers(void); /* buf_table.c */ extern Size BufTableShmemSize(int size); diff --git a/src/backend/storage/buffer/freelist.c b/src/backend/storage/buffer/freelist.c index dffdd57e9b5..6cad8540bd9 100644 --- a/src/backend/storage/buffer/freelist.c +++ b/src/backend/storage/buffer/freelist.c @@ -180,6 +180,23 @@ have_free_buffer(void) return false; } +/* + * get_number_of_free_buffers -- a lockless way to get the number of free + * buffers in buffer pool. + * + * Note that result continuosly changes as free buffers are moved out by other + * operations. + */ +int +get_number_of_free_buffers(void) +{ + /* All the buffers are free. */ + if (StrategyControl->firstFreeBuffer < 0) + return NBuffers; + else + return StrategyControl->lastFreeBuffer - StrategyControl->firstFreeBuffer; +} + /* * StrategyGetBuffer * diff --git a/contrib/pg_prewarm/autoprewarm.c b/contrib/pg_prewarm/autoprewarm.c index 8291bf3c427..16174bf3d0f 100644 --- a/contrib/pg_prewarm/autoprewarm.c +++ b/contrib/pg_prewarm/autoprewarm.c @@ -518,13 +518,13 @@ autoprewarm_database_main(Datum main_arg) pos = apw_state->prewarm_start_idx; p.block_info = block_info; - p.max_pos = apw_state->prewarm_stop_idx; + p.max_pos = Min(apw_state->prewarm_stop_idx, pos + get_number_of_free_buffers()); /* * Loop until we run out of blocks to prewarm or until we run out of free * buffers. */ - for (; pos < apw_state->prewarm_stop_idx; pos++) + for (; pos < p.max_pos; pos++) { BlockInfoRecord *blk = &block_info[pos]; Buffer buf; -- 2.45.2