Hi, Thank you for looking into this.
On Fri, 29 Nov 2024 at 06:55, Kirill Reshke <reshkekir...@gmail.com> wrote: > > + old_blk = &(p->block_info[p->pos - 1]); > > + cur_blk = &(p->block_info[p->pos]); > Should we Assert(p->pos > 0 && p->pos < *something*) I think it is worth adding: + Assert(p->pos > 0 && p->pos < p->max_pos); + + old_blk = &(p->block_info[p->pos - 1]); + cur_blk = &(p->block_info[p->pos]); v4 is attached. -- Regards, Nazir Bilal Yavuz Microsoft
From ffd37cc5010d2253b3f843e891df0e75ee8d27a6 Mon Sep 17 00:00:00 2001 From: Nazir Bilal Yavuz <byavu...@gmail.com> Date: Tue, 5 Nov 2024 11:40:14 +0300 Subject: [PATCH v4 1/2] Optimize autoprewarm with read streams We've measured 10% performance improvement, and this arranges to benefit automatically from future optimizations to the read_stream subsystem. --- contrib/pg_prewarm/autoprewarm.c | 114 +++++++++++++++++++++++++++++-- 1 file changed, 109 insertions(+), 5 deletions(-) diff --git a/contrib/pg_prewarm/autoprewarm.c b/contrib/pg_prewarm/autoprewarm.c index fac4051e1aa..e7a8213209a 100644 --- a/contrib/pg_prewarm/autoprewarm.c +++ b/contrib/pg_prewarm/autoprewarm.c @@ -41,6 +41,7 @@ #include "storage/latch.h" #include "storage/lwlock.h" #include "storage/procsignal.h" +#include "storage/read_stream.h" #include "storage/smgr.h" #include "tcop/tcopprot.h" #include "utils/guc.h" @@ -422,6 +423,70 @@ apw_load_buffers(void) apw_state->prewarmed_blocks, num_elements))); } +struct apw_read_stream_private +{ + bool first_block; + int max_pos; + int pos; + BlockInfoRecord *block_info; + BlockNumber nblocks_in_fork; + +}; + +static BlockNumber +apw_read_stream_next_block(ReadStream *stream, + void *callback_private_data, + void *per_buffer_data) +{ + struct apw_read_stream_private *p = callback_private_data; + bool *rs_have_free_buffer = per_buffer_data; + BlockInfoRecord *old_blk; + BlockInfoRecord *cur_blk; + + *rs_have_free_buffer = true; + + /* + * There may still be queued blocks in the stream even when no free + * buffers are available in the buffer pool. This can lead to unnecessary + * I/O operations and buffer evictions. One possible solution is to + * compare the number of free buffers in the buffer pool with the number + * of queued blocks in the stream. However, this approach is considered a + * workaround and would add complexity with minimal benefit, as only a few + * unnecessary I/O operations and buffer evictions are expected. + * Therefore, this solution has not been implemented. + */ + if (!have_free_buffer()) + { + *rs_have_free_buffer = false; + return InvalidBlockNumber; + } + + if (p->pos == p->max_pos) + return InvalidBlockNumber; + + if (p->first_block) + { + p->first_block = false; + return p->block_info[p->pos++].blocknum; + } + + Assert(p->pos > 0 && p->pos < p->max_pos); + + old_blk = &(p->block_info[p->pos - 1]); + cur_blk = &(p->block_info[p->pos]); + + if (old_blk->database == cur_blk->database && + old_blk->forknum == cur_blk->forknum && + old_blk->filenumber == cur_blk->filenumber && + cur_blk->blocknum < p->nblocks_in_fork) + { + p->pos++; + return cur_blk->blocknum; + } + + return InvalidBlockNumber; +} + /* * Prewarm all blocks for one database (and possibly also global objects, if * those got grouped with this database). @@ -435,6 +500,9 @@ autoprewarm_database_main(Datum main_arg) BlockNumber nblocks = 0; BlockInfoRecord *old_blk = NULL; dsm_segment *seg; + ReadStream *stream = NULL; + struct apw_read_stream_private p; + bool *rs_have_free_buffer; /* Establish signal handlers; once that's done, unblock signals. */ pqsignal(SIGTERM, die); @@ -451,13 +519,16 @@ autoprewarm_database_main(Datum main_arg) block_info = (BlockInfoRecord *) dsm_segment_address(seg); pos = apw_state->prewarm_start_idx; + p.block_info = block_info; + p.max_pos = apw_state->prewarm_stop_idx; + /* * Loop until we run out of blocks to prewarm or until we run out of free * buffers. */ - while (pos < apw_state->prewarm_stop_idx && have_free_buffer()) + for (; pos < apw_state->prewarm_stop_idx; pos++) { - BlockInfoRecord *blk = &block_info[pos++]; + BlockInfoRecord *blk = &block_info[pos]; Buffer buf; CHECK_FOR_INTERRUPTS(); @@ -470,6 +541,18 @@ autoprewarm_database_main(Datum main_arg) old_blk->database != 0) break; + /* + * If stream needs to be created again, end it before closing the old + * relation. + */ + if (stream && (old_blk == NULL || + old_blk->filenumber != blk->filenumber || + old_blk->forknum != blk->forknum)) + { + Assert(read_stream_next_buffer(stream, (void **) &rs_have_free_buffer) == InvalidBuffer); + read_stream_end(stream); + } + /* * As soon as we encounter a block of a new relation, close the old * relation. Note that rel will be NULL if try_relation_open failed @@ -506,7 +589,10 @@ autoprewarm_database_main(Datum main_arg) continue; } - /* Once per fork, check for fork existence and size. */ + /* + * Once per fork, check for fork existence and size. Then create read + * stream if it is suitable. + */ if (old_blk == NULL || old_blk->filenumber != blk->filenumber || old_blk->forknum != blk->forknum) @@ -518,7 +604,21 @@ autoprewarm_database_main(Datum main_arg) if (blk->forknum > InvalidForkNumber && blk->forknum <= MAX_FORKNUM && smgrexists(RelationGetSmgr(rel), blk->forknum)) + { nblocks = RelationGetNumberOfBlocksInFork(rel, blk->forknum); + + /* Create read stream. */ + p.nblocks_in_fork = nblocks; + p.pos = pos; + p.first_block = true; + stream = read_stream_begin_relation(READ_STREAM_FULL, + NULL, + rel, + blk->forknum, + apw_read_stream_next_block, + &p, + sizeof(bool)); + } else nblocks = 0; } @@ -532,16 +632,20 @@ autoprewarm_database_main(Datum main_arg) } /* Prewarm buffer. */ - buf = ReadBufferExtended(rel, blk->forknum, blk->blocknum, RBM_NORMAL, - NULL); + buf = read_stream_next_buffer(stream, (void **) &rs_have_free_buffer); if (BufferIsValid(buf)) { apw_state->prewarmed_blocks++; ReleaseBuffer(buf); } + /* There are no free buffers left in shared buffers, break the loop. */ + else if (!(*rs_have_free_buffer)) + break; old_blk = blk; } + Assert(read_stream_next_buffer(stream, (void **) &rs_have_free_buffer) == InvalidBuffer); + read_stream_end(stream); dsm_detach(seg); -- 2.45.2
From d35f774b03c8436c0e108b1b445ef61c00eb7180 Mon Sep 17 00:00:00 2001 From: Nazir Bilal Yavuz <byavu...@gmail.com> Date: Wed, 27 Nov 2024 16:53:58 +0300 Subject: [PATCH v4 2/2] Count free buffers at the start of the autoprewarm Streamified version of the autoprewarm code may do unnecessary I/O and buffer evicting. To prevent it at least a little bit, count the number of free buffers in the buffer pool and queue buffers up to that number in the callback function of the autoprewarm. --- src/include/storage/buf_internals.h | 1 + src/backend/storage/buffer/freelist.c | 17 +++++++++++++++++ contrib/pg_prewarm/autoprewarm.c | 4 ++-- 3 files changed, 20 insertions(+), 2 deletions(-) diff --git a/src/include/storage/buf_internals.h b/src/include/storage/buf_internals.h index eda6c699212..d95050adb23 100644 --- a/src/include/storage/buf_internals.h +++ b/src/include/storage/buf_internals.h @@ -432,6 +432,7 @@ extern void StrategyNotifyBgWriter(int bgwprocno); extern Size StrategyShmemSize(void); extern void StrategyInitialize(bool init); extern bool have_free_buffer(void); +extern int get_number_of_free_buffers(void); /* buf_table.c */ extern Size BufTableShmemSize(int size); diff --git a/src/backend/storage/buffer/freelist.c b/src/backend/storage/buffer/freelist.c index dffdd57e9b5..3259e6280cf 100644 --- a/src/backend/storage/buffer/freelist.c +++ b/src/backend/storage/buffer/freelist.c @@ -180,6 +180,23 @@ have_free_buffer(void) return false; } +/* + * get_number_of_free_buffers -- a lockless way to get the number of free + * buffers in buffer pool. + * + * Note that result continuously changes as free buffers are moved out by other + * operations. + */ +int +get_number_of_free_buffers(void) +{ + /* All the buffers are free. */ + if (StrategyControl->firstFreeBuffer < 0) + return NBuffers; + else + return StrategyControl->lastFreeBuffer - StrategyControl->firstFreeBuffer; +} + /* * StrategyGetBuffer * diff --git a/contrib/pg_prewarm/autoprewarm.c b/contrib/pg_prewarm/autoprewarm.c index e7a8213209a..1b5a53e63bf 100644 --- a/contrib/pg_prewarm/autoprewarm.c +++ b/contrib/pg_prewarm/autoprewarm.c @@ -520,13 +520,13 @@ autoprewarm_database_main(Datum main_arg) pos = apw_state->prewarm_start_idx; p.block_info = block_info; - p.max_pos = apw_state->prewarm_stop_idx; + p.max_pos = Min(apw_state->prewarm_stop_idx, pos + get_number_of_free_buffers()); /* * Loop until we run out of blocks to prewarm or until we run out of free * buffers. */ - for (; pos < apw_state->prewarm_stop_idx; pos++) + for (; pos < p.max_pos; pos++) { BlockInfoRecord *blk = &block_info[pos]; Buffer buf; -- 2.45.2