Hi, On Sat, 31 Aug 2024 at 02:51, Noah Misch <n...@leadboat.com> wrote: > > To read blocks 10 and 11, I would expect to initialize the struct with one of: > > { .first=10, .nblocks=2 } > { .first=10, .last_inclusive=11 } > { .first=10, .last_exclusive=12 } > > With the patch's API, I would need {.first=10,.nblocks=12}. The struct field > named "nblocks" behaves like a last_block_exclusive. Please either make the > behavior an "nblocks" behavior or change the field name to replace the term > "nblocks" with something matching the behavior. (I used longer field names in > my examples here, to disambiguate those examples. It's okay if the final > field names aren't those, as long as the field names and the behavior align.)
I decided to use 'current_blocknum' and 'last_exclusive'. I think these are easier to understand and use. > > Thanks for the information, I will check these. What I still do not > > understand is how to make sure that only the second block is processed > > and the first one is skipped. pg_check_visible() and pg_check_frozen() > > returns TIDs that cause corruption in the visibility map, there is no > > information about block numbers. > > I see what you're saying. collect_corrupt_items() needs a corrupt table to > report anything; all corruption-free tables get the same output. Testing this > would need extra C code or techniques like corrupt_page_checksum() to create > the corrupt state. That wouldn't be a bad thing to have, but it's big enough > that I'll consider it out of scope for $SUBJECT. With the callback change > above, I'll be ready to push all this. Thanks, updated patches are attached. -- Regards, Nazir Bilal Yavuz Microsoft
From 1dbdeacc54c3575a2cb82e95aab5b335b0fa1d2f Mon Sep 17 00:00:00 2001 From: Nazir Bilal Yavuz <byavuz81@gmail.com> Date: Mon, 26 Aug 2024 12:12:52 +0300 Subject: [PATCH v4 1/5] Add general-use struct and callback to read stream Number of callbacks that just iterates over block range in read stream are increasing. So, add general-use struct and callback to read stream. --- src/include/storage/read_stream.h | 13 +++++++++++++ src/backend/storage/aio/read_stream.c | 21 +++++++++++++++++++-- src/tools/pgindent/typedefs.list | 1 + 3 files changed, 33 insertions(+), 2 deletions(-) diff --git a/src/include/storage/read_stream.h b/src/include/storage/read_stream.h index 4e599904f26..0a2398fd7df 100644 --- a/src/include/storage/read_stream.h +++ b/src/include/storage/read_stream.h @@ -45,11 +45,24 @@ struct ReadStream; typedef struct ReadStream ReadStream; +/* + * General-use struct to use in callback functions for block range scans. + * Callback loops between current_blocknum (inclusive) and last_exclusive. + */ +typedef struct BlockRangeReadStreamPrivate +{ + BlockNumber current_blocknum; + BlockNumber last_exclusive; +} BlockRangeReadStreamPrivate; + /* Callback that returns the next block number to read. */ typedef BlockNumber (*ReadStreamBlockNumberCB) (ReadStream *stream, void *callback_private_data, void *per_buffer_data); +extern BlockNumber block_range_read_stream_cb(ReadStream *stream, + void *callback_private_data, + void *per_buffer_data); extern ReadStream *read_stream_begin_relation(int flags, BufferAccessStrategy strategy, Relation rel, diff --git a/src/backend/storage/aio/read_stream.c b/src/backend/storage/aio/read_stream.c index 93cdd35fea0..8a449bab8a0 100644 --- a/src/backend/storage/aio/read_stream.c +++ b/src/backend/storage/aio/read_stream.c @@ -164,8 +164,25 @@ get_per_buffer_data(ReadStream *stream, int16 buffer_index) } /* - * Ask the callback which block it would like us to read next, with a one block - * buffer in front to allow read_stream_unget_block() to work. + * General-use callback function for block range scans. + */ +BlockNumber +block_range_read_stream_cb(ReadStream *stream, + void *callback_private_data, + void *per_buffer_data) +{ + BlockRangeReadStreamPrivate *p = callback_private_data; + + if (p->current_blocknum < p->last_exclusive) + return p->current_blocknum++; + + return InvalidBlockNumber; +} + +/* + * Ask the callback which block it would like us to read next, with a small + * buffer in front to allow read_stream_unget_block() to work and to allow the + * fast path to skip this function and work directly from the array. */ static inline BlockNumber read_stream_get_block(ReadStream *stream, void *per_buffer_data) diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index 9e951a9e6f3..df3f336bec0 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -275,6 +275,7 @@ BlockId BlockIdData BlockInfoRecord BlockNumber +BlockRangeReadStreamPrivate BlockRefTable BlockRefTableBuffer BlockRefTableChunk -- 2.45.2
From 84bc78cc36814309634c3686827b8a222bdfbfef Mon Sep 17 00:00:00 2001 From: Nazir Bilal Yavuz <byavuz81@gmail.com> Date: Mon, 26 Aug 2024 12:16:06 +0300 Subject: [PATCH v4 2/5] pg_prewarm: Use generic-use read stream struct and callback Instead of creating another read stream struct and callback, use generic one that is exported from read stream. --- contrib/pg_prewarm/pg_prewarm.c | 27 ++++----------------------- 1 file changed, 4 insertions(+), 23 deletions(-) diff --git a/contrib/pg_prewarm/pg_prewarm.c b/contrib/pg_prewarm/pg_prewarm.c index 5c859e983c5..243d36c46e8 100644 --- a/contrib/pg_prewarm/pg_prewarm.c +++ b/contrib/pg_prewarm/pg_prewarm.c @@ -39,25 +39,6 @@ typedef enum static PGIOAlignedBlock blockbuffer; -struct pg_prewarm_read_stream_private -{ - BlockNumber blocknum; - int64 last_block; -}; - -static BlockNumber -pg_prewarm_read_stream_next_block(ReadStream *stream, - void *callback_private_data, - void *per_buffer_data) -{ - struct pg_prewarm_read_stream_private *p = callback_private_data; - - if (p->blocknum <= p->last_block) - return p->blocknum++; - - return InvalidBlockNumber; -} - /* * pg_prewarm(regclass, mode text, fork text, * first_block int8, last_block int8) @@ -203,7 +184,7 @@ pg_prewarm(PG_FUNCTION_ARGS) } else if (ptype == PREWARM_BUFFER) { - struct pg_prewarm_read_stream_private p; + BlockRangeReadStreamPrivate p; ReadStream *stream; /* @@ -211,14 +192,14 @@ pg_prewarm(PG_FUNCTION_ARGS) */ /* Set up the private state for our streaming buffer read callback. */ - p.blocknum = first_block; - p.last_block = last_block; + p.current_blocknum = first_block; + p.last_exclusive = last_block + 1; stream = read_stream_begin_relation(READ_STREAM_FULL, NULL, rel, forkNumber, - pg_prewarm_read_stream_next_block, + block_range_read_stream_cb, &p, 0); -- 2.45.2
From d5a743b4541d43abb5808696e72d50333a755ef7 Mon Sep 17 00:00:00 2001 From: Nazir Bilal Yavuz <byavuz81@gmail.com> Date: Mon, 26 Aug 2024 12:24:26 +0300 Subject: [PATCH v4 3/5] RelationCopyStorageUsingBuffer: Use generic-use read stream struct and callback Instead of creating another read stream struct and callback, use generic one that is exported from read stream. --- src/backend/storage/buffer/bufmgr.c | 35 ++++------------------------- 1 file changed, 4 insertions(+), 31 deletions(-) diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index 5cdd2f10fc8..7dc800888e0 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -136,33 +136,6 @@ typedef struct SMgrSortArray SMgrRelation srel; } SMgrSortArray; -/* - * Helper struct for read stream object used in - * RelationCopyStorageUsingBuffer() function. - */ -struct copy_storage_using_buffer_read_stream_private -{ - BlockNumber blocknum; - BlockNumber nblocks; -}; - -/* - * Callback function to get next block for read stream object used in - * RelationCopyStorageUsingBuffer() function. - */ -static BlockNumber -copy_storage_using_buffer_read_stream_next_block(ReadStream *stream, - void *callback_private_data, - void *per_buffer_data) -{ - struct copy_storage_using_buffer_read_stream_private *p = callback_private_data; - - if (p->blocknum < p->nblocks) - return p->blocknum++; - - return InvalidBlockNumber; -} - /* GUC variables */ bool zero_damaged_pages = false; int bgwriter_lru_maxpages = 100; @@ -4710,7 +4683,7 @@ RelationCopyStorageUsingBuffer(RelFileLocator srclocator, PGIOAlignedBlock buf; BufferAccessStrategy bstrategy_src; BufferAccessStrategy bstrategy_dst; - struct copy_storage_using_buffer_read_stream_private p; + BlockRangeReadStreamPrivate p; ReadStream *src_stream; SMgrRelation src_smgr; @@ -4742,15 +4715,15 @@ RelationCopyStorageUsingBuffer(RelFileLocator srclocator, bstrategy_dst = GetAccessStrategy(BAS_BULKWRITE); /* Initalize streaming read */ - p.blocknum = 0; - p.nblocks = nblocks; + p.current_blocknum = 0; + p.last_exclusive = nblocks; src_smgr = smgropen(srclocator, INVALID_PROC_NUMBER); src_stream = read_stream_begin_smgr_relation(READ_STREAM_FULL, bstrategy_src, src_smgr, permanent ? RELPERSISTENCE_PERMANENT : RELPERSISTENCE_UNLOGGED, forkNum, - copy_storage_using_buffer_read_stream_next_block, + block_range_read_stream_cb, &p, 0); -- 2.45.2
From 03d07e5a20b8a1c74074861f8b16f2c88b48667a Mon Sep 17 00:00:00 2001 From: Nazir Bilal Yavuz <byavuz81@gmail.com> Date: Mon, 26 Aug 2024 15:45:20 +0300 Subject: [PATCH v4 4/5] Use read stream in pg_visibility in collect_visibility_data() Instead of reading blocks with ReadBufferExtended() in collect_visibility_data(), use read stream. This change provides about 4% performance improvement. --- contrib/pg_visibility/pg_visibility.c | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/contrib/pg_visibility/pg_visibility.c b/contrib/pg_visibility/pg_visibility.c index 773ba92e454..087cd23d1b0 100644 --- a/contrib/pg_visibility/pg_visibility.c +++ b/contrib/pg_visibility/pg_visibility.c @@ -21,6 +21,7 @@ #include "storage/bufmgr.h" #include "storage/proc.h" #include "storage/procarray.h" +#include "storage/read_stream.h" #include "storage/smgr.h" #include "utils/rel.h" #include "utils/snapmgr.h" @@ -478,6 +479,8 @@ collect_visibility_data(Oid relid, bool include_pd) BlockNumber blkno; Buffer vmbuffer = InvalidBuffer; BufferAccessStrategy bstrategy = GetAccessStrategy(BAS_BULKREAD); + BlockRangeReadStreamPrivate p; + ReadStream *stream = NULL; rel = relation_open(relid, AccessShareLock); @@ -489,6 +492,20 @@ collect_visibility_data(Oid relid, bool include_pd) info->next = 0; info->count = nblocks; + /* Crate a read stream if read will be done. */ + if (include_pd) + { + p.current_blocknum = 0; + p.last_exclusive = nblocks; + stream = read_stream_begin_relation(READ_STREAM_FULL, + bstrategy, + rel, + MAIN_FORKNUM, + block_range_read_stream_cb, + &p, + 0); + } + for (blkno = 0; blkno < nblocks; ++blkno) { int32 mapbits; @@ -513,8 +530,7 @@ collect_visibility_data(Oid relid, bool include_pd) Buffer buffer; Page page; - buffer = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL, - bstrategy); + buffer = read_stream_next_buffer(stream, NULL); LockBuffer(buffer, BUFFER_LOCK_SHARE); page = BufferGetPage(buffer); @@ -525,6 +541,12 @@ collect_visibility_data(Oid relid, bool include_pd) } } + if (include_pd && stream) + { + Assert(read_stream_next_buffer(stream, NULL) == InvalidBuffer); + read_stream_end(stream); + } + /* Clean up. */ if (vmbuffer != InvalidBuffer) ReleaseBuffer(vmbuffer); -- 2.45.2
From d8017fb647a7438a7e061380e0859a62d7095ab7 Mon Sep 17 00:00:00 2001 From: Nazir Bilal Yavuz <byavuz81@gmail.com> Date: Fri, 23 Aug 2024 10:15:09 +0300 Subject: [PATCH v4 5/5] Use read stream in pg_visibility in collect_corrupt_items() Instead of reading blocks with ReadBufferExtended() in collect_corrupt_items(), use read stream. This change provides about 5% performance improvement. --- contrib/pg_visibility/pg_visibility.c | 87 ++++++++++++++++++++++----- 1 file changed, 72 insertions(+), 15 deletions(-) diff --git a/contrib/pg_visibility/pg_visibility.c b/contrib/pg_visibility/pg_visibility.c index 087cd23d1b0..3b6ead83c45 100644 --- a/contrib/pg_visibility/pg_visibility.c +++ b/contrib/pg_visibility/pg_visibility.c @@ -42,6 +42,20 @@ typedef struct corrupt_items ItemPointer tids; } corrupt_items; +/* + * Helper struct for read stream object used in + * collect_corrupt_items() function. + */ +struct collect_corrupt_items_read_stream_private +{ + bool all_frozen; + bool all_visible; + BlockNumber current_blocknum; + BlockNumber last_exclusive; + Relation rel; + Buffer *vmbuffer; +}; + PG_FUNCTION_INFO_V1(pg_visibility_map); PG_FUNCTION_INFO_V1(pg_visibility_map_rel); PG_FUNCTION_INFO_V1(pg_visibility); @@ -632,6 +646,38 @@ GetStrictOldestNonRemovableTransactionId(Relation rel) } } +/* + * Callback function to get next block for read stream object used in + * collect_corrupt_items() function. + */ +static BlockNumber +collect_corrupt_items_read_stream_next_block(ReadStream *stream, + void *callback_private_data, + void *per_buffer_data) +{ + struct collect_corrupt_items_read_stream_private *p = callback_private_data; + + for (; p->current_blocknum < p->last_exclusive; p->current_blocknum++) + { + bool check_frozen = false; + bool check_visible = false; + + /* Make sure we are interruptible. */ + CHECK_FOR_INTERRUPTS(); + + if (p->all_frozen && VM_ALL_FROZEN(p->rel, p->current_blocknum, p->vmbuffer)) + check_frozen = true; + if (p->all_visible && VM_ALL_VISIBLE(p->rel, p->current_blocknum, p->vmbuffer)) + check_visible = true; + if (!check_visible && !check_frozen) + continue; + + return p->current_blocknum++; + } + + return InvalidBlockNumber; +} + /* * Returns a list of items whose visibility map information does not match * the status of the tuples on the page. @@ -654,8 +700,12 @@ collect_corrupt_items(Oid relid, bool all_visible, bool all_frozen) corrupt_items *items; BlockNumber blkno; Buffer vmbuffer = InvalidBuffer; + Buffer stream_vmbuffer = InvalidBuffer; BufferAccessStrategy bstrategy = GetAccessStrategy(BAS_BULKREAD); TransactionId OldestXmin = InvalidTransactionId; + struct collect_corrupt_items_read_stream_private p; + ReadStream *stream; + Buffer buffer; rel = relation_open(relid, AccessShareLock); @@ -680,12 +730,25 @@ collect_corrupt_items(Oid relid, bool all_visible, bool all_frozen) items->count = 64; items->tids = palloc(items->count * sizeof(ItemPointerData)); + p.current_blocknum = 0; + p.last_exclusive = nblocks; + p.rel = rel; + p.vmbuffer = &stream_vmbuffer; + p.all_frozen = all_frozen; + p.all_visible = all_visible; + stream = read_stream_begin_relation(READ_STREAM_FULL, + bstrategy, + rel, + MAIN_FORKNUM, + collect_corrupt_items_read_stream_next_block, + &p, + 0); + /* Loop over every block in the relation. */ - for (blkno = 0; blkno < nblocks; ++blkno) + while ((buffer = read_stream_next_buffer(stream, NULL)) != InvalidBuffer) { bool check_frozen = false; bool check_visible = false; - Buffer buffer; Page page; OffsetNumber offnum, maxoff; @@ -693,29 +756,19 @@ collect_corrupt_items(Oid relid, bool all_visible, bool all_frozen) /* Make sure we are interruptible. */ CHECK_FOR_INTERRUPTS(); - /* Use the visibility map to decide whether to check this page. */ - if (all_frozen && VM_ALL_FROZEN(rel, blkno, &vmbuffer)) - check_frozen = true; - if (all_visible && VM_ALL_VISIBLE(rel, blkno, &vmbuffer)) - check_visible = true; - if (!check_visible && !check_frozen) - continue; - - /* Read and lock the page. */ - buffer = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL, - bstrategy); LockBuffer(buffer, BUFFER_LOCK_SHARE); page = BufferGetPage(buffer); maxoff = PageGetMaxOffsetNumber(page); + blkno = BufferGetBlockNumber(buffer); /* * The visibility map bits might have changed while we were acquiring * the page lock. Recheck to avoid returning spurious results. */ - if (check_frozen && !VM_ALL_FROZEN(rel, blkno, &vmbuffer)) + if (all_frozen && !VM_ALL_FROZEN(rel, blkno, &vmbuffer)) check_frozen = false; - if (check_visible && !VM_ALL_VISIBLE(rel, blkno, &vmbuffer)) + if (all_visible && !VM_ALL_VISIBLE(rel, blkno, &vmbuffer)) check_visible = false; if (!check_visible && !check_frozen) { @@ -800,10 +853,14 @@ collect_corrupt_items(Oid relid, bool all_visible, bool all_frozen) UnlockReleaseBuffer(buffer); } + Assert(read_stream_next_buffer(stream, NULL) == InvalidBuffer); + read_stream_end(stream); /* Clean up. */ if (vmbuffer != InvalidBuffer) ReleaseBuffer(vmbuffer); + if (stream_vmbuffer != InvalidBuffer) + ReleaseBuffer(stream_vmbuffer); relation_close(rel, AccessShareLock); /* -- 2.45.2