Hi, I am working on using the read stream in pg_visibility. There are two places to use it:
1- collect_visibility_data() This one is straightforward. I created a read stream object if 'include_pd' is true because read I/O is done when 'include_pd' is true. There is ~4% timing improvement with this change. I started the server with the default settings and created a 6 GB table. Then run 100 times pg_visibility() by clearing the OS cache between each run. ---------- 2- collect_corrupt_items() This one is more complicated. The read stream callback function loops until it finds a suitable block to read. So, if the callback returns an InvalidBlockNumber; it means that the stream processed all possible blocks and the stream should be finished. There is ~3% timing improvement with this change. I started the server with the default settings and created a 6 GB table. Then run 100 times pg_check_visible() by clearing the OS cache between each run. The downside of this approach is there are too many "vmbuffer is valid but BufferGetBlockNumber(*vmbuffer) is not equal to mapBlock, so vmbuffer needs to be read again" cases in the read stream version (700 vs 20 for the 6 GB table). This is caused by the callback function of the read stream reading a new vmbuf while getting next block numbers. So, vmbuf is wrong when we are checking visibility map bits that might have changed while we were acquiring the page lock. ---------- Both patches are attached. Any feedback would be appreciated. -- Regards, Nazir Bilal Yavuz Microsoft
From c178e010759f042dc089fd4c5b2283a04b95f017 Mon Sep 17 00:00:00 2001 From: Nazir Bilal Yavuz <byavu...@gmail.com> Date: Fri, 9 Aug 2024 11:41:52 +0300 Subject: [PATCH v1 1/2] Use read stream in pg_visibility in collect_visibility_data() Instead of reading blocks with ReadBufferExtended() in collect_visibility_data(), use read stream. This change provides about 4% performance improvement. --- contrib/pg_visibility/pg_visibility.c | 53 ++++++++++++++++++++++++++- 1 file changed, 51 insertions(+), 2 deletions(-) diff --git a/contrib/pg_visibility/pg_visibility.c b/contrib/pg_visibility/pg_visibility.c index 1a1a4ff7be7..10f961c58f0 100644 --- a/contrib/pg_visibility/pg_visibility.c +++ b/contrib/pg_visibility/pg_visibility.c @@ -21,6 +21,7 @@ #include "storage/bufmgr.h" #include "storage/proc.h" #include "storage/procarray.h" +#include "storage/read_stream.h" #include "storage/smgr.h" #include "utils/rel.h" #include "utils/snapmgr.h" @@ -41,6 +42,16 @@ typedef struct corrupt_items ItemPointer tids; } corrupt_items; +/* + * Helper struct for read stream object used in + * collect_visibility_data() function. + */ +struct collect_visibility_data_read_stream_private +{ + BlockNumber blocknum; + BlockNumber nblocks; +}; + PG_FUNCTION_INFO_V1(pg_visibility_map); PG_FUNCTION_INFO_V1(pg_visibility_map_rel); PG_FUNCTION_INFO_V1(pg_visibility); @@ -463,6 +474,23 @@ pg_visibility_tupdesc(bool include_blkno, bool include_pd) return BlessTupleDesc(tupdesc); } +/* + * Callback function to get next block for read stream object used in + * collect_visibility_data() function. + */ +static BlockNumber +collect_visibility_data_read_stream_next_block(ReadStream *stream, + void *callback_private_data, + void *per_buffer_data) +{ + struct collect_visibility_data_read_stream_private *p = callback_private_data; + + if (p->blocknum < p->nblocks) + return p->blocknum++; + + return InvalidBlockNumber; +} + /* * Collect visibility data about a relation. * @@ -478,6 +506,8 @@ collect_visibility_data(Oid relid, bool include_pd) BlockNumber blkno; Buffer vmbuffer = InvalidBuffer; BufferAccessStrategy bstrategy = GetAccessStrategy(BAS_BULKREAD); + struct collect_visibility_data_read_stream_private p; + ReadStream *stream = NULL; rel = relation_open(relid, AccessShareLock); @@ -489,6 +519,20 @@ collect_visibility_data(Oid relid, bool include_pd) info->next = 0; info->count = nblocks; + /* Crate a read stream if read will be done. */ + if (include_pd) + { + p.blocknum = 0; + p.nblocks = nblocks; + stream = read_stream_begin_relation(READ_STREAM_FULL, + bstrategy, + rel, + MAIN_FORKNUM, + collect_visibility_data_read_stream_next_block, + &p, + 0); + } + for (blkno = 0; blkno < nblocks; ++blkno) { int32 mapbits; @@ -513,8 +557,7 @@ collect_visibility_data(Oid relid, bool include_pd) Buffer buffer; Page page; - buffer = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL, - bstrategy); + buffer = read_stream_next_buffer(stream, NULL); LockBuffer(buffer, BUFFER_LOCK_SHARE); page = BufferGetPage(buffer); @@ -525,6 +568,12 @@ collect_visibility_data(Oid relid, bool include_pd) } } + if (include_pd && stream) + { + Assert(read_stream_next_buffer(stream, NULL) == InvalidBuffer); + read_stream_end(stream); + } + /* Clean up. */ if (vmbuffer != InvalidBuffer) ReleaseBuffer(vmbuffer); -- 2.45.2
From 456d1df641fd6e81658fd9288e1f81b9ce068708 Mon Sep 17 00:00:00 2001 From: Nazir Bilal Yavuz <byavu...@gmail.com> Date: Tue, 13 Aug 2024 13:03:39 +0300 Subject: [PATCH v1 2/2] Use read stream in pg_visibility in collect_corrupt_items() Instead of reading blocks with ReadBufferExtended() in collect_corrupt_items(), use read stream. This change provides about 3% performance improvement. --- contrib/pg_visibility/pg_visibility.c | 93 +++++++++++++++++++++++---- 1 file changed, 81 insertions(+), 12 deletions(-) diff --git a/contrib/pg_visibility/pg_visibility.c b/contrib/pg_visibility/pg_visibility.c index 10f961c58f0..4729ee95991 100644 --- a/contrib/pg_visibility/pg_visibility.c +++ b/contrib/pg_visibility/pg_visibility.c @@ -52,6 +52,20 @@ struct collect_visibility_data_read_stream_private BlockNumber nblocks; }; +/* + * Helper struct for read stream object used in + * collect_corrupt_items() function. + */ +struct collect_corrupt_items_read_stream_private +{ + bool all_frozen; + bool all_visible; + BlockNumber blocknum; + BlockNumber nblocks; + Relation rel; + Buffer *vmbuffer; +}; + PG_FUNCTION_INFO_V1(pg_visibility_map); PG_FUNCTION_INFO_V1(pg_visibility_map_rel); PG_FUNCTION_INFO_V1(pg_visibility); @@ -639,6 +653,35 @@ GetStrictOldestNonRemovableTransactionId(Relation rel) } } +/* + * Callback function to get next block for read stream object used in + * collect_corrupt_items() function. + */ +static BlockNumber +collect_corrupt_items_read_stream_next_block(ReadStream *stream, + void *callback_private_data, + void *per_buffer_data) +{ + struct collect_corrupt_items_read_stream_private *p = callback_private_data; + + for (; p->blocknum < p->nblocks; p->blocknum++) + { + bool check_frozen = false; + bool check_visible = false; + + if (p->all_frozen && VM_ALL_FROZEN(p->rel, p->blocknum, p->vmbuffer)) + check_frozen = true; + if (p->all_visible && VM_ALL_VISIBLE(p->rel, p->blocknum, p->vmbuffer)) + check_visible = true; + if (!check_visible && !check_frozen) + continue; + + return p->blocknum++; + } + + return InvalidBlockNumber; +} + /* * Returns a list of items whose visibility map information does not match * the status of the tuples on the page. @@ -663,6 +706,10 @@ collect_corrupt_items(Oid relid, bool all_visible, bool all_frozen) Buffer vmbuffer = InvalidBuffer; BufferAccessStrategy bstrategy = GetAccessStrategy(BAS_BULKREAD); TransactionId OldestXmin = InvalidTransactionId; + struct collect_corrupt_items_read_stream_private p; + ReadStream *stream; + + Assert(all_visible || all_frozen); rel = relation_open(relid, AccessShareLock); @@ -687,6 +734,20 @@ collect_corrupt_items(Oid relid, bool all_visible, bool all_frozen) items->count = 64; items->tids = palloc(items->count * sizeof(ItemPointerData)); + p.blocknum = 0; + p.nblocks = nblocks; + p.rel = rel; + p.vmbuffer = &vmbuffer; + p.all_frozen = all_frozen; + p.all_visible = all_visible; + stream = read_stream_begin_relation(READ_STREAM_FULL, + bstrategy, + rel, + MAIN_FORKNUM, + collect_corrupt_items_read_stream_next_block, + &p, + 0); + /* Loop over every block in the relation. */ for (blkno = 0; blkno < nblocks; ++blkno) { @@ -700,17 +761,20 @@ collect_corrupt_items(Oid relid, bool all_visible, bool all_frozen) /* Make sure we are interruptible. */ CHECK_FOR_INTERRUPTS(); - /* Use the visibility map to decide whether to check this page. */ - if (all_frozen && VM_ALL_FROZEN(rel, blkno, &vmbuffer)) - check_frozen = true; - if (all_visible && VM_ALL_VISIBLE(rel, blkno, &vmbuffer)) - check_visible = true; - if (!check_visible && !check_frozen) - continue; - /* Read and lock the page. */ - buffer = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL, - bstrategy); + buffer = read_stream_next_buffer(stream, NULL); + + /* + * If the read stream returns an InvalidBuffer, this means all the + * blocks are processed. So, end the stream and loop. + */ + if (buffer == InvalidBuffer) + { + read_stream_end(stream); + stream = NULL; + break; + } + LockBuffer(buffer, BUFFER_LOCK_SHARE); page = BufferGetPage(buffer); @@ -720,9 +784,9 @@ collect_corrupt_items(Oid relid, bool all_visible, bool all_frozen) * The visibility map bits might have changed while we were acquiring * the page lock. Recheck to avoid returning spurious results. */ - if (check_frozen && !VM_ALL_FROZEN(rel, blkno, &vmbuffer)) + if (all_frozen && !VM_ALL_FROZEN(rel, blkno, &vmbuffer)) check_frozen = false; - if (check_visible && !VM_ALL_VISIBLE(rel, blkno, &vmbuffer)) + if (all_visible && !VM_ALL_VISIBLE(rel, blkno, &vmbuffer)) check_visible = false; if (!check_visible && !check_frozen) { @@ -807,6 +871,11 @@ collect_corrupt_items(Oid relid, bool all_visible, bool all_frozen) UnlockReleaseBuffer(buffer); } + if (stream) + { + Assert(read_stream_next_buffer(stream, NULL) == InvalidBuffer); + read_stream_end(stream); + } /* Clean up. */ if (vmbuffer != InvalidBuffer) -- 2.45.2