On Wed, Mar 8, 2023 at 2:23 PM Andres Freund <and...@anarazel.de> wrote: > > On 2023-03-08 13:44:32 -0500, Melanie Plageman wrote: > > However, I am concerned that, while unlikely, this could be flakey. > > Something could happen to force all of those blocks out of shared > > buffers (even though they were just read in) before we hit them. > > You could make the test query a simple nested loop self-join, that'll prevent > the page being evicted, because it'll still be pinned on the outer side, while > generating hits on the inner side.
Good idea. v3 attached.
From e15bd63d662780898ef6fd584608acb13a12dbe1 Mon Sep 17 00:00:00 2001 From: Melanie Plageman <melanieplage...@gmail.com> Date: Sat, 25 Feb 2023 14:37:25 -0500 Subject: [PATCH v3 1/2] Reorder pgstatfuncs-local enum --- src/backend/utils/adt/pgstatfuncs.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c index b61a12382b..1112bc2904 100644 --- a/src/backend/utils/adt/pgstatfuncs.c +++ b/src/backend/utils/adt/pgstatfuncs.c @@ -1276,16 +1276,16 @@ pgstat_get_io_op_index(IOOp io_op) { case IOOP_EVICT: return IO_COL_EVICTIONS; + case IOOP_EXTEND: + return IO_COL_EXTENDS; + case IOOP_FSYNC: + return IO_COL_FSYNCS; case IOOP_READ: return IO_COL_READS; case IOOP_REUSE: return IO_COL_REUSES; case IOOP_WRITE: return IO_COL_WRITES; - case IOOP_EXTEND: - return IO_COL_EXTENDS; - case IOOP_FSYNC: - return IO_COL_FSYNCS; } elog(ERROR, "unrecognized IOOp value: %d", io_op); -- 2.37.2
From b648727abe315375fbf13d314fd3b9398a75b26f Mon Sep 17 00:00:00 2001 From: Melanie Plageman <melanieplage...@gmail.com> Date: Sat, 25 Feb 2023 14:36:06 -0500 Subject: [PATCH v3 2/2] Track shared buffer hits in pg_stat_io Count new IOOP_HITs and add "hits" column to pg_stat_io. Reviewed-by: Bertrand Drouvot <bertranddrouvot...@gmail.com> Discussion: https://www.postgresql.org/message-id/flat/CAAKRu_beMa9Hzih40%3DXPYqhDVz6tsgUGTrhZXRo%3Dunp%2Bszb%3DUA%40mail.gmail.com --- doc/src/sgml/monitoring.sgml | 11 ++++++++ src/backend/catalog/system_views.sql | 1 + src/backend/storage/buffer/bufmgr.c | 38 ++++++++++---------------- src/backend/storage/buffer/localbuf.c | 11 ++------ src/backend/utils/activity/pgstat_io.c | 2 +- src/backend/utils/adt/pgstatfuncs.c | 3 ++ src/include/catalog/pg_proc.dat | 6 ++-- src/include/pgstat.h | 1 + src/include/storage/buf_internals.h | 2 +- src/test/regress/expected/rules.out | 3 +- src/test/regress/expected/stats.out | 34 +++++++++++++++++++++-- src/test/regress/sql/stats.sql | 21 ++++++++++++-- 12 files changed, 90 insertions(+), 43 deletions(-) diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml index 6249bb50d0..8b34ca60bc 100644 --- a/doc/src/sgml/monitoring.sgml +++ b/doc/src/sgml/monitoring.sgml @@ -3855,6 +3855,17 @@ SELECT pid, wait_event_type, wait_event FROM pg_stat_activity WHERE wait_event i </entry> </row> + <row> + <entry role="catalog_table_entry"> + <para role="column_definition"> + <structfield>hits</structfield> <type>bigint</type> + </para> + <para> + The number of times a desired block was found in a shared buffer. + </para> + </entry> + </row> + <row> <entry role="catalog_table_entry"> <para role="column_definition"> diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql index 34ca0e739f..87bbbdfcb3 100644 --- a/src/backend/catalog/system_views.sql +++ b/src/backend/catalog/system_views.sql @@ -1126,6 +1126,7 @@ SELECT b.writes, b.extends, b.op_bytes, + b.hits, b.evictions, b.reuses, b.fsyncs, diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index 0a05577b68..05fd3c9a2a 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -472,7 +472,7 @@ static BufferDesc *BufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum, BufferAccessStrategy strategy, - bool *foundPtr, IOContext *io_context); + bool *foundPtr, IOContext io_context); static void FlushBuffer(BufferDesc *buf, SMgrRelation reln, IOObject io_object, IOContext io_context); static void FindAndDropRelationBuffers(RelFileLocator rlocator, @@ -850,13 +850,14 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, if (isLocalBuf) { /* - * LocalBufferAlloc() will set the io_context to IOCONTEXT_NORMAL. We - * do not use a BufferAccessStrategy for I/O of temporary tables. + * We do not use a BufferAccessStrategy for I/O of temporary tables. * However, in some cases, the "strategy" may not be NULL, so we can't * rely on IOContextForStrategy() to set the right IOContext for us. * This may happen in cases like CREATE TEMPORARY TABLE AS... */ - bufHdr = LocalBufferAlloc(smgr, forkNum, blockNum, &found, &io_context); + io_context = IOCONTEXT_NORMAL; + io_object = IOOBJECT_TEMP_RELATION; + bufHdr = LocalBufferAlloc(smgr, forkNum, blockNum, &found); if (found) pgBufferUsage.local_blks_hit++; else if (isExtend) @@ -871,8 +872,10 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, * lookup the buffer. IO_IN_PROGRESS is set if the requested block is * not currently in memory. */ + io_context = IOContextForStrategy(strategy); + io_object = IOOBJECT_RELATION; bufHdr = BufferAlloc(smgr, relpersistence, forkNum, blockNum, - strategy, &found, &io_context); + strategy, &found, io_context); if (found) pgBufferUsage.shared_blks_hit++; else if (isExtend) @@ -892,6 +895,7 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, /* Just need to update stats before we exit */ *hit = true; VacuumPageHit++; + pgstat_count_io_op(io_object, io_context, IOOP_HIT); if (VacuumCostActive) VacuumCostBalance += VacuumCostPageHit; @@ -987,16 +991,7 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, */ Assert(!(pg_atomic_read_u32(&bufHdr->state) & BM_VALID)); /* spinlock not needed */ - if (isLocalBuf) - { - bufBlock = LocalBufHdrGetBlock(bufHdr); - io_object = IOOBJECT_TEMP_RELATION; - } - else - { - bufBlock = BufHdrGetBlock(bufHdr); - io_object = IOOBJECT_RELATION; - } + bufBlock = isLocalBuf ? LocalBufHdrGetBlock(bufHdr) : BufHdrGetBlock(bufHdr); if (isExtend) { @@ -1139,7 +1134,7 @@ static BufferDesc * BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, BlockNumber blockNum, BufferAccessStrategy strategy, - bool *foundPtr, IOContext *io_context) + bool *foundPtr, IOContext io_context) { bool from_ring; BufferTag newTag; /* identity of requested block */ @@ -1193,11 +1188,8 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, { /* * If we get here, previous attempts to read the buffer must - * have failed ... but we shall bravely try again. Set - * io_context since we will in fact need to count an IO - * Operation. + * have failed ... but we shall bravely try again. */ - *io_context = IOContextForStrategy(strategy); *foundPtr = false; } } @@ -1211,8 +1203,6 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, */ LWLockRelease(newPartitionLock); - *io_context = IOContextForStrategy(strategy); - /* Loop here in case we have to try another victim buffer */ for (;;) { @@ -1295,7 +1285,7 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, smgr->smgr_rlocator.locator.dbOid, smgr->smgr_rlocator.locator.relNumber); - FlushBuffer(buf, NULL, IOOBJECT_RELATION, *io_context); + FlushBuffer(buf, NULL, IOOBJECT_RELATION, io_context); LWLockRelease(BufferDescriptorGetContentLock(buf)); ScheduleBufferTagForWriteback(&BackendWritebackContext, @@ -1494,7 +1484,7 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, * we may have been forced to release the buffer due to concurrent * pinners or erroring out. */ - pgstat_count_io_op(IOOBJECT_RELATION, *io_context, + pgstat_count_io_op(IOOBJECT_RELATION, io_context, from_ring ? IOOP_REUSE : IOOP_EVICT); } diff --git a/src/backend/storage/buffer/localbuf.c b/src/backend/storage/buffer/localbuf.c index 5325ddb663..880c9909b9 100644 --- a/src/backend/storage/buffer/localbuf.c +++ b/src/backend/storage/buffer/localbuf.c @@ -108,7 +108,7 @@ PrefetchLocalBuffer(SMgrRelation smgr, ForkNumber forkNum, */ BufferDesc * LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum, - bool *foundPtr, IOContext *io_context) + bool *foundPtr) { BufferTag newTag; /* identity of requested block */ LocalBufferLookupEnt *hresult; @@ -128,14 +128,6 @@ LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum, hresult = (LocalBufferLookupEnt *) hash_search(LocalBufHash, &newTag, HASH_FIND, NULL); - /* - * IO Operations on local buffers are only done in IOCONTEXT_NORMAL. Set - * io_context here (instead of after a buffer hit would have returned) for - * convenience since we don't have to worry about the overhead of calling - * IOContextForStrategy(). - */ - *io_context = IOCONTEXT_NORMAL; - if (hresult) { b = hresult->id; @@ -239,6 +231,7 @@ LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum, buf_state &= ~BM_DIRTY; pg_atomic_unlocked_write_u32(&bufHdr->state, buf_state); + /* Temporary table I/O does not use Buffer Access Strategies */ pgstat_count_io_op(IOOBJECT_TEMP_RELATION, IOCONTEXT_NORMAL, IOOP_WRITE); pgBufferUsage.local_blks_written++; } diff --git a/src/backend/utils/activity/pgstat_io.c b/src/backend/utils/activity/pgstat_io.c index af5d554610..ae8bb34f78 100644 --- a/src/backend/utils/activity/pgstat_io.c +++ b/src/backend/utils/activity/pgstat_io.c @@ -344,7 +344,7 @@ pgstat_tracks_io_op(BackendType bktype, IOObject io_object, * Some BackendTypes will not do certain IOOps. */ if ((bktype == B_BG_WRITER || bktype == B_CHECKPOINTER) && - (io_op == IOOP_READ || io_op == IOOP_EVICT)) + (io_op == IOOP_READ || io_op == IOOP_EVICT || io_op == IOOP_HIT)) return false; if ((bktype == B_AUTOVAC_LAUNCHER || bktype == B_BG_WRITER || diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c index 1112bc2904..e2e79eca99 100644 --- a/src/backend/utils/adt/pgstatfuncs.c +++ b/src/backend/utils/adt/pgstatfuncs.c @@ -1258,6 +1258,7 @@ typedef enum io_stat_col IO_COL_WRITES, IO_COL_EXTENDS, IO_COL_CONVERSION, + IO_COL_HITS, IO_COL_EVICTIONS, IO_COL_REUSES, IO_COL_FSYNCS, @@ -1280,6 +1281,8 @@ pgstat_get_io_op_index(IOOp io_op) return IO_COL_EXTENDS; case IOOP_FSYNC: return IO_COL_FSYNCS; + case IOOP_HIT: + return IO_COL_HITS; case IOOP_READ: return IO_COL_READS; case IOOP_REUSE: diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index 505595620e..615139c2f9 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -5721,9 +5721,9 @@ proname => 'pg_stat_get_io', provolatile => 'v', prorows => '30', proretset => 't', proparallel => 'r', prorettype => 'record', proargtypes => '', - proallargtypes => '{text,text,text,int8,int8,int8,int8,int8,int8,int8,timestamptz}', - proargmodes => '{o,o,o,o,o,o,o,o,o,o,o}', - proargnames => '{backend_type,io_object,io_context,reads,writes,extends,op_bytes,evictions,reuses,fsyncs,stats_reset}', + proallargtypes => '{text,text,text,int8,int8,int8,int8,int8,int8,int8,int8,timestamptz}', + proargmodes => '{o,o,o,o,o,o,o,o,o,o,o,o}', + proargnames => '{backend_type,io_object,io_context,reads,writes,extends,op_bytes,hits,evictions,reuses,fsyncs,stats_reset}', prosrc => 'pg_stat_get_io' }, { oid => '1136', descr => 'statistics: information about WAL activity', diff --git a/src/include/pgstat.h b/src/include/pgstat.h index f43fac09ed..a7f77c17b3 100644 --- a/src/include/pgstat.h +++ b/src/include/pgstat.h @@ -304,6 +304,7 @@ typedef enum IOOp IOOP_EVICT, IOOP_EXTEND, IOOP_FSYNC, + IOOP_HIT, IOOP_READ, IOOP_REUSE, IOOP_WRITE, diff --git a/src/include/storage/buf_internals.h b/src/include/storage/buf_internals.h index 0b44814740..2afb9bb309 100644 --- a/src/include/storage/buf_internals.h +++ b/src/include/storage/buf_internals.h @@ -419,7 +419,7 @@ extern PrefetchBufferResult PrefetchLocalBuffer(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum); extern BufferDesc *LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, - BlockNumber blockNum, bool *foundPtr, IOContext *io_context); + BlockNumber blockNum, bool *foundPtr); extern void MarkLocalBufferDirty(Buffer buffer); extern void DropRelationLocalBuffers(RelFileLocator rlocator, ForkNumber forkNum, diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out index e953d1f515..c994da9f97 100644 --- a/src/test/regress/expected/rules.out +++ b/src/test/regress/expected/rules.out @@ -1883,11 +1883,12 @@ pg_stat_io| SELECT backend_type, writes, extends, op_bytes, + hits, evictions, reuses, fsyncs, stats_reset - FROM pg_stat_get_io() b(backend_type, io_object, io_context, reads, writes, extends, op_bytes, evictions, reuses, fsyncs, stats_reset); + FROM pg_stat_get_io() b(backend_type, io_object, io_context, reads, writes, extends, op_bytes, hits, evictions, reuses, fsyncs, stats_reset); pg_stat_progress_analyze| SELECT s.pid, s.datid, d.datname, diff --git a/src/test/regress/expected/stats.out b/src/test/regress/expected/stats.out index 186c296299..7d62ef5f17 100644 --- a/src/test/regress/expected/stats.out +++ b/src/test/regress/expected/stats.out @@ -1131,6 +1131,7 @@ SELECT pg_stat_get_subscription_stats(NULL); -- - writes of shared buffers to permanent storage -- - extends of relations using shared buffers -- - fsyncs done to ensure the durability of data dirtying shared buffers +-- - shared buffer hits -- There is no test for blocks evicted from shared buffers, because we cannot -- be sure of the state of shared buffers at the point the test is run. -- Create a regular table and insert some data to generate IOCONTEXT_NORMAL @@ -1208,6 +1209,35 @@ SELECT :io_sum_shared_after_reads > :io_sum_shared_before_reads; t (1 row) +SELECT sum(hits) AS io_sum_shared_before_hits + FROM pg_stat_io WHERE io_context = 'normal' AND io_object = 'relation' \gset +-- Select from the table again to count hits. +-- Ensure we generate hits by forcing a nested loop join with no materialize +-- node. The outer side buffer will stay pinned, preventing its eviction, while +-- we loop through the inner side and generate hits. +SET enable_nestloop TO on; SET enable_mergejoin TO off; SET enable_hashjoin TO off; +SET enable_material TO off; +SELECT COUNT(*) FROM test_io_shared t1 INNER JOIN test_io_shared t2 USING (a); + count +------- + 100 +(1 row) + +SELECT pg_stat_force_next_flush(); + pg_stat_force_next_flush +-------------------------- + +(1 row) + +SELECT sum(hits) AS io_sum_shared_after_hits + FROM pg_stat_io WHERE io_context = 'normal' AND io_object = 'relation' \gset +SELECT :io_sum_shared_after_hits > :io_sum_shared_before_hits; + ?column? +---------- + t +(1 row) + +RESET enable_mergejoin; RESET enable_hashjoin; RESET enable_material; DROP TABLE test_io_shared; -- Test that the follow IOCONTEXT_LOCAL IOOps are tracked in pg_stat_io: -- - eviction of local buffers in order to reuse them @@ -1342,7 +1372,7 @@ SELECT pg_stat_have_stats('io', 0, 0); t (1 row) -SELECT sum(evictions) + sum(reuses) + sum(extends) + sum(fsyncs) + sum(reads) + sum(writes) AS io_stats_pre_reset +SELECT sum(evictions) + sum(reuses) + sum(extends) + sum(fsyncs) + sum(reads) + sum(writes) + sum(hits) AS io_stats_pre_reset FROM pg_stat_io \gset SELECT pg_stat_reset_shared('io'); pg_stat_reset_shared @@ -1350,7 +1380,7 @@ SELECT pg_stat_reset_shared('io'); (1 row) -SELECT sum(evictions) + sum(reuses) + sum(extends) + sum(fsyncs) + sum(reads) + sum(writes) AS io_stats_post_reset +SELECT sum(evictions) + sum(reuses) + sum(extends) + sum(fsyncs) + sum(reads) + sum(writes) + sum(hits) AS io_stats_post_reset FROM pg_stat_io \gset SELECT :io_stats_post_reset < :io_stats_pre_reset; ?column? diff --git a/src/test/regress/sql/stats.sql b/src/test/regress/sql/stats.sql index d7f873cfc9..495e48990d 100644 --- a/src/test/regress/sql/stats.sql +++ b/src/test/regress/sql/stats.sql @@ -541,6 +541,7 @@ SELECT pg_stat_get_subscription_stats(NULL); -- - writes of shared buffers to permanent storage -- - extends of relations using shared buffers -- - fsyncs done to ensure the durability of data dirtying shared buffers +-- - shared buffer hits -- There is no test for blocks evicted from shared buffers, because we cannot -- be sure of the state of shared buffers at the point the test is run. @@ -588,6 +589,22 @@ SELECT pg_stat_force_next_flush(); SELECT sum(reads) AS io_sum_shared_after_reads FROM pg_stat_io WHERE io_context = 'normal' AND io_object = 'relation' \gset SELECT :io_sum_shared_after_reads > :io_sum_shared_before_reads; + +SELECT sum(hits) AS io_sum_shared_before_hits + FROM pg_stat_io WHERE io_context = 'normal' AND io_object = 'relation' \gset +-- Select from the table again to count hits. +-- Ensure we generate hits by forcing a nested loop join with no materialize +-- node. The outer side buffer will stay pinned, preventing its eviction, while +-- we loop through the inner side and generate hits. +SET enable_nestloop TO on; SET enable_mergejoin TO off; SET enable_hashjoin TO off; +SET enable_material TO off; +SELECT COUNT(*) FROM test_io_shared t1 INNER JOIN test_io_shared t2 USING (a); +SELECT pg_stat_force_next_flush(); +SELECT sum(hits) AS io_sum_shared_after_hits + FROM pg_stat_io WHERE io_context = 'normal' AND io_object = 'relation' \gset +SELECT :io_sum_shared_after_hits > :io_sum_shared_before_hits; +RESET enable_mergejoin; RESET enable_hashjoin; RESET enable_material; + DROP TABLE test_io_shared; -- Test that the follow IOCONTEXT_LOCAL IOOps are tracked in pg_stat_io: @@ -675,10 +692,10 @@ SELECT :io_sum_bulkwrite_strategy_extends_after > :io_sum_bulkwrite_strategy_ext -- Test IO stats reset SELECT pg_stat_have_stats('io', 0, 0); -SELECT sum(evictions) + sum(reuses) + sum(extends) + sum(fsyncs) + sum(reads) + sum(writes) AS io_stats_pre_reset +SELECT sum(evictions) + sum(reuses) + sum(extends) + sum(fsyncs) + sum(reads) + sum(writes) + sum(hits) AS io_stats_pre_reset FROM pg_stat_io \gset SELECT pg_stat_reset_shared('io'); -SELECT sum(evictions) + sum(reuses) + sum(extends) + sum(fsyncs) + sum(reads) + sum(writes) AS io_stats_post_reset +SELECT sum(evictions) + sum(reuses) + sum(extends) + sum(fsyncs) + sum(reads) + sum(writes) + sum(hits) AS io_stats_post_reset FROM pg_stat_io \gset SELECT :io_stats_post_reset < :io_stats_pre_reset; -- 2.37.2