On Wed, Aug 11, 2021 at 4:11 PM Melanie Plageman <melanieplage...@gmail.com> wrote: > > On Tue, Aug 3, 2021 at 2:13 PM Andres Freund <and...@anarazel.de> wrote: > > > > > @@ -2895,6 +2948,20 @@ FlushBuffer(BufferDesc *buf, SMgrRelation reln) > > > /* > > > * bufToWrite is either the shared buffer or a copy, as appropriate. > > > */ > > > + > > > + /* > > > + * TODO: consider that if we did not need to distinguish between a > > > buffer > > > + * flushed that was grabbed from the ring buffer and written out as > > > part > > > + * of a strategy which was not from main Shared Buffers (and thus > > > + * preventable by bgwriter or checkpointer), then we could move all > > > calls > > > + * to pgstat_increment_buffer_action() here except for the one for > > > + * extends, which would remain in ReadBuffer_common() before > > > smgrextend() > > > + * (unless we decide to start counting other extends). That > > > includes the > > > + * call to count buffers written by bgwriter and checkpointer which > > > go > > > + * through FlushBuffer() but not BufferAlloc(). That would make it > > > + * simpler. Perhaps instead we can find somewhere else to indicate > > > that > > > + * the buffer is from the ring of buffers to reuse. > > > + */ > > > smgrwrite(reln, > > > buf->tag.forkNum, > > > buf->tag.blockNum, > > > > Can we just add a parameter to FlushBuffer indicating what the source of the > > write is? > > > > I just noticed this comment now, so I'll address that in the next > version. I rebased today and noticed merge conflicts, so, it looks like > v5 will be on its way soon anyway. >
Actually, after moving the code around like you suggested, calling pgstat_increment_buffer_action() before smgrwrite() in FlushBuffer() and using a parameter to indicate if it is a strategy write or not would only save us one other call to pgstat_increment_buffer_action() -- the one in SyncOneBuffer(). We would end up moving the one in BufferAlloc() to FlushBuffer() and removing the one in SyncOneBuffer(). Do you think it is still worth it? Rebased v5 attached.
From c5feb44585e1e073927c8d45230aa2eabc178c7e Mon Sep 17 00:00:00 2001 From: Melanie Plageman <melanieplage...@gmail.com> Date: Wed, 11 Aug 2021 17:49:08 -0400 Subject: [PATCH v5] Add system view tracking shared buffer actions Add a system view which tracks - number of shared buffers the checkpointer and bgwriter write out - number of shared buffers a regular backend is forced to flush - number of extends done by a regular backend through shared buffers - number of buffers flushed by a backend or autovacuum using a BufferAccessStrategy which, were they not to use this strategy, could perhaps have been avoided if a clean shared buffer was available - number of fsyncs done by a backend which could have been done by checkpointer if sync queue had not been full - number of buffers allocated by a regular backend or autovacuum worker for either a new block or an existing block of a relation which is not currently in a buffer All of these stats which were in the system view pg_stat_bgwriter have been removed from that view. All backends, on exit, will update a shared memory array with the buffers they wrote or extended. When the view is queried, add all live backend's statuses to the totals in the shared memory array and return that as the full total. Each row of the view is for a particular backend type and each column is the number of a particular kind of buffer action taken by the various backends. TODO: - Some kind of test? - Docs change --- src/backend/catalog/system_views.sql | 13 ++- src/backend/postmaster/checkpointer.c | 27 +------ src/backend/postmaster/pgstat.c | 43 +++++++++- src/backend/storage/buffer/bufmgr.c | 30 +++++-- src/backend/storage/buffer/freelist.c | 17 +++- src/backend/utils/activity/backend_status.c | 57 +++++++++++++ src/backend/utils/adt/pgstatfuncs.c | 90 ++++++++++++++++++--- src/backend/utils/init/miscinit.c | 2 + src/include/catalog/pg_proc.dat | 22 +++-- src/include/miscadmin.h | 13 +++ src/include/pgstat.h | 24 ++++-- src/include/storage/buf_internals.h | 4 +- src/include/utils/backend_status.h | 14 ++++ src/test/regress/expected/rules.out | 10 ++- src/test/regress/sql/stats.sql | 1 + 15 files changed, 293 insertions(+), 74 deletions(-) diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql index 55f6e3711d..f51e7938fc 100644 --- a/src/backend/catalog/system_views.sql +++ b/src/backend/catalog/system_views.sql @@ -1067,11 +1067,18 @@ CREATE VIEW pg_stat_bgwriter AS pg_stat_get_bgwriter_buf_written_checkpoints() AS buffers_checkpoint, pg_stat_get_bgwriter_buf_written_clean() AS buffers_clean, pg_stat_get_bgwriter_maxwritten_clean() AS maxwritten_clean, - pg_stat_get_buf_written_backend() AS buffers_backend, - pg_stat_get_buf_fsync_backend() AS buffers_backend_fsync, - pg_stat_get_buf_alloc() AS buffers_alloc, pg_stat_get_bgwriter_stat_reset_time() AS stats_reset; +CREATE VIEW pg_stat_buffer_actions AS +SELECT + b.backend_type, + b.buffers_alloc, + b.buffers_extend, + b.buffers_fsync, + b.buffers_write, + b.buffers_write_strat +FROM pg_stat_get_buffer_actions() b; + CREATE VIEW pg_stat_wal AS SELECT w.wal_records, diff --git a/src/backend/postmaster/checkpointer.c b/src/backend/postmaster/checkpointer.c index be7366379d..fca78fa4ef 100644 --- a/src/backend/postmaster/checkpointer.c +++ b/src/backend/postmaster/checkpointer.c @@ -90,17 +90,9 @@ * requesting backends since the last checkpoint start. The flags are * chosen so that OR'ing is the correct way to combine multiple requests. * - * num_backend_writes is used to count the number of buffer writes performed - * by user backend processes. This counter should be wide enough that it - * can't overflow during a single processing cycle. num_backend_fsync - * counts the subset of those writes that also had to do their own fsync, - * because the checkpointer failed to absorb their request. - * * The requests array holds fsync requests sent by backends and not yet * absorbed by the checkpointer. * - * Unlike the checkpoint fields, num_backend_writes, num_backend_fsync, and - * the requests fields are protected by CheckpointerCommLock. *---------- */ typedef struct @@ -124,9 +116,6 @@ typedef struct ConditionVariable start_cv; /* signaled when ckpt_started advances */ ConditionVariable done_cv; /* signaled when ckpt_done advances */ - uint32 num_backend_writes; /* counts user backend buffer writes */ - uint32 num_backend_fsync; /* counts user backend fsync calls */ - int num_requests; /* current # of requests */ int max_requests; /* allocated array size */ CheckpointerRequest requests[FLEXIBLE_ARRAY_MEMBER]; @@ -1085,10 +1074,6 @@ ForwardSyncRequest(const FileTag *ftag, SyncRequestType type) LWLockAcquire(CheckpointerCommLock, LW_EXCLUSIVE); - /* Count all backend writes regardless of if they fit in the queue */ - if (!AmBackgroundWriterProcess()) - CheckpointerShmem->num_backend_writes++; - /* * If the checkpointer isn't running or the request queue is full, the * backend will have to perform its own fsync request. But before forcing @@ -1102,8 +1087,7 @@ ForwardSyncRequest(const FileTag *ftag, SyncRequestType type) * Count the subset of writes where backends have to do their own * fsync */ - if (!AmBackgroundWriterProcess()) - CheckpointerShmem->num_backend_fsync++; + pgstat_increment_buffer_action(BA_Fsync); LWLockRelease(CheckpointerCommLock); return false; } @@ -1260,15 +1244,6 @@ AbsorbSyncRequests(void) LWLockAcquire(CheckpointerCommLock, LW_EXCLUSIVE); - /* Transfer stats counts into pending pgstats message */ - PendingCheckpointerStats.m_buf_written_backend - += CheckpointerShmem->num_backend_writes; - PendingCheckpointerStats.m_buf_fsync_backend - += CheckpointerShmem->num_backend_fsync; - - CheckpointerShmem->num_backend_writes = 0; - CheckpointerShmem->num_backend_fsync = 0; - /* * We try to avoid holding the lock for a long time by copying the request * array, and processing the requests after releasing the lock. diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c index 1b54ef74eb..04a6fec18e 100644 --- a/src/backend/postmaster/pgstat.c +++ b/src/backend/postmaster/pgstat.c @@ -70,6 +70,7 @@ #include "utils/rel.h" #include "utils/snapmgr.h" #include "utils/timestamp.h" +#include "utils/backend_status.h" /* ---------- * Timer definitions. @@ -128,6 +129,7 @@ char *pgstat_stat_tmpname = NULL; * Stored directly in a stats message structure so they can be sent * without needing to copy things around. We assume these init to zeroes. */ +PgStat_MsgBufferActions BufferActionsStats; PgStat_MsgBgWriter PendingBgWriterStats; PgStat_MsgCheckpointer PendingCheckpointerStats; PgStat_MsgWal WalStats; @@ -360,6 +362,7 @@ static void pgstat_recv_anl_ancestors(PgStat_MsgAnlAncestors *msg, int len); static void pgstat_recv_archiver(PgStat_MsgArchiver *msg, int len); static void pgstat_recv_bgwriter(PgStat_MsgBgWriter *msg, int len); static void pgstat_recv_checkpointer(PgStat_MsgCheckpointer *msg, int len); +static void pgstat_recv_buffer_actions(PgStat_MsgBufferActions *msg, int len); static void pgstat_recv_wal(PgStat_MsgWal *msg, int len); static void pgstat_recv_slru(PgStat_MsgSLRU *msg, int len); static void pgstat_recv_funcstat(PgStat_MsgFuncstat *msg, int len); @@ -866,6 +869,8 @@ pgstat_report_stat(bool disconnect) pgstat_assert_is_up(); + + /* * Don't expend a clock check if nothing to do. * @@ -971,6 +976,10 @@ pgstat_report_stat(bool disconnect) /* Now, send function statistics */ pgstat_send_funcstats(); + // TODO: not really sure if this is the right place to do this + if (pgstat_record_dying_backend_buffer_actions()) + pgstat_send_buffer_actions(); + /* Send WAL statistics */ pgstat_send_wal(true); @@ -3137,6 +3146,16 @@ pgstat_send_checkpointer(void) MemSet(&PendingCheckpointerStats, 0, sizeof(PendingCheckpointerStats)); } +void +pgstat_send_buffer_actions(void) +{ + pgstat_setheader(&BufferActionsStats.m_hdr, PGSTAT_MTYPE_BUFFER_ACTIONS); + pgstat_send(&BufferActionsStats, sizeof(BufferActionsStats)); + + // TODO: not needed because backends only call this when exiting? + MemSet(&BufferActionsStats, 0, sizeof(BufferActionsStats)); +} + /* ---------- * pgstat_send_wal() - * @@ -3483,6 +3502,10 @@ PgstatCollectorMain(int argc, char *argv[]) pgstat_recv_checkpointer(&msg.msg_checkpointer, len); break; + case PGSTAT_MTYPE_BUFFER_ACTIONS: + pgstat_recv_buffer_actions(&msg.msg_buffer_actions, len); + break; + case PGSTAT_MTYPE_WAL: pgstat_recv_wal(&msg.msg_wal, len); break; @@ -5465,7 +5488,6 @@ pgstat_recv_bgwriter(PgStat_MsgBgWriter *msg, int len) { globalStats.bgwriter.buf_written_clean += msg->m_buf_written_clean; globalStats.bgwriter.maxwritten_clean += msg->m_maxwritten_clean; - globalStats.bgwriter.buf_alloc += msg->m_buf_alloc; } /* ---------- @@ -5482,8 +5504,23 @@ pgstat_recv_checkpointer(PgStat_MsgCheckpointer *msg, int len) globalStats.checkpointer.checkpoint_write_time += msg->m_checkpoint_write_time; globalStats.checkpointer.checkpoint_sync_time += msg->m_checkpoint_sync_time; globalStats.checkpointer.buf_written_checkpoints += msg->m_buf_written_checkpoints; - globalStats.checkpointer.buf_written_backend += msg->m_buf_written_backend; - globalStats.checkpointer.buf_fsync_backend += msg->m_buf_fsync_backend; +} + +static void +pgstat_recv_buffer_actions(PgStat_MsgBufferActions *msg, int len) +{ + globalStats.buffer_actions[msg->backend_type].backend_type = msg->backend_type; + globalStats.buffer_actions[msg->backend_type].allocs += msg->allocs; + globalStats.buffer_actions[msg->backend_type].extends += msg->extends; + globalStats.buffer_actions[msg->backend_type].fsyncs += msg->fsyncs; + globalStats.buffer_actions[msg->backend_type].writes += msg->writes; + globalStats.buffer_actions[msg->backend_type].writes_strat += msg->writes_strat; +} + +PgStat_MsgBufferActions * +pgstat_get_buffer_action_stats(BackendType backend_type) +{ + return &globalStats.buffer_actions[backend_type]; } /* ---------- diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index 3b485de067..74f88a918e 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -963,6 +963,10 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, if (isExtend) { + /* + * Extends counted here are only those that go through shared buffers + */ + pgstat_increment_buffer_action(BA_Extend); /* new buffers are zero-filled */ MemSet((char *) bufBlock, 0, BLCKSZ); /* don't set checksum for all-zero page */ @@ -1163,6 +1167,7 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, /* Loop here in case we have to try another victim buffer */ for (;;) { + bool from_ring = false; /* * Ensure, while the spinlock's not yet held, that there's a free * refcount entry. @@ -1173,7 +1178,7 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, * Select a victim buffer. The buffer is returned with its header * spinlock still held! */ - buf = StrategyGetBuffer(strategy, &buf_state); + buf = StrategyGetBuffer(strategy, &buf_state, &from_ring); Assert(BUF_STATE_GET_REFCOUNT(buf_state) == 0); @@ -1210,6 +1215,7 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, if (LWLockConditionalAcquire(BufferDescriptorGetContentLock(buf), LW_SHARED)) { + BufferActionType buffer_action; /* * If using a nondefault strategy, and writing the buffer * would require a WAL flush, let the strategy decide whether @@ -1227,7 +1233,7 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, UnlockBufHdr(buf, buf_state); if (XLogNeedsFlush(lsn) && - StrategyRejectBuffer(strategy, buf)) + StrategyRejectBuffer(strategy, buf, &from_ring)) { /* Drop lock/pin and loop around for another buffer */ LWLockRelease(BufferDescriptorGetContentLock(buf)); @@ -1236,6 +1242,21 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, } } + /* + * When a strategy is in use, if the dirty buffer was selected + * from the strategy ring and we did not bother checking the + * freelist or doing a clock sweep to look for a clean shared + * buffer to use, the write will be counted as a strategy + * write. However, if the dirty buffer was obtained from the + * freelist or a clock sweep, it is counted as a regular write. + * When a strategy is not in use, at this point, the write can + * only be a "regular" write of a dirty buffer. + */ + + buffer_action = from_ring ? BA_Write_Strat : BA_Write; + pgstat_increment_buffer_action(buffer_action); + + /* OK, do the I/O */ TRACE_POSTGRESQL_BUFFER_WRITE_DIRTY_START(forkNum, blockNum, smgr->smgr_rnode.node.spcNode, @@ -2246,9 +2267,6 @@ BgBufferSync(WritebackContext *wb_context) */ strategy_buf_id = StrategySyncStart(&strategy_passes, &recent_alloc); - /* Report buffer alloc counts to pgstat */ - PendingBgWriterStats.m_buf_alloc += recent_alloc; - /* * If we're not running the LRU scan, just stop after doing the stats * stuff. We mark the saved state invalid so that we can recover sanely @@ -2543,6 +2561,8 @@ SyncOneBuffer(int buf_id, bool skip_recently_used, WritebackContext *wb_context) * Pin it, share-lock it, write it. (FlushBuffer will do nothing if the * buffer is clean by the time we've locked it.) */ + + pgstat_increment_buffer_action(BA_Write); PinBuffer_Locked(bufHdr); LWLockAcquire(BufferDescriptorGetContentLock(bufHdr), LW_SHARED); diff --git a/src/backend/storage/buffer/freelist.c b/src/backend/storage/buffer/freelist.c index 6be80476db..20bba546fe 100644 --- a/src/backend/storage/buffer/freelist.c +++ b/src/backend/storage/buffer/freelist.c @@ -19,6 +19,7 @@ #include "storage/buf_internals.h" #include "storage/bufmgr.h" #include "storage/proc.h" +#include "utils/backend_status.h" #define INT_ACCESS_ONCE(var) ((int)(*((volatile int *)&(var)))) @@ -198,7 +199,7 @@ have_free_buffer(void) * return the buffer with the buffer header spinlock still held. */ BufferDesc * -StrategyGetBuffer(BufferAccessStrategy strategy, uint32 *buf_state) +StrategyGetBuffer(BufferAccessStrategy strategy, uint32 *buf_state, bool *from_ring) { BufferDesc *buf; int bgwprocno; @@ -213,7 +214,10 @@ StrategyGetBuffer(BufferAccessStrategy strategy, uint32 *buf_state) { buf = GetBufferFromRing(strategy, buf_state); if (buf != NULL) + { + *from_ring = true; return buf; + } } /* @@ -247,6 +251,7 @@ StrategyGetBuffer(BufferAccessStrategy strategy, uint32 *buf_state) * the rate of buffer consumption. Note that buffers recycled by a * strategy object are intentionally not counted here. */ + pgstat_increment_buffer_action(BA_Alloc); pg_atomic_fetch_add_u32(&StrategyControl->numBufferAllocs, 1); /* @@ -683,7 +688,7 @@ AddBufferToRing(BufferAccessStrategy strategy, BufferDesc *buf) * if this buffer should be written and re-used. */ bool -StrategyRejectBuffer(BufferAccessStrategy strategy, BufferDesc *buf) +StrategyRejectBuffer(BufferAccessStrategy strategy, BufferDesc *buf, bool *from_ring) { /* We only do this in bulkread mode */ if (strategy->btype != BAS_BULKREAD) @@ -700,5 +705,13 @@ StrategyRejectBuffer(BufferAccessStrategy strategy, BufferDesc *buf) */ strategy->buffers[strategy->current] = InvalidBuffer; + /* + * Since we will not be writing out a dirty buffer from the ring, set + * from_ring to false so that we do not count this write as a "strategy + * write" and can do proper bookkeeping for pg_stat_buffer_actions. + */ + *from_ring = false; + + return true; } diff --git a/src/backend/utils/activity/backend_status.c b/src/backend/utils/activity/backend_status.c index 2901f9f5a9..b2dbc4dc28 100644 --- a/src/backend/utils/activity/backend_status.c +++ b/src/backend/utils/activity/backend_status.c @@ -399,6 +399,11 @@ pgstat_bestart(void) lbeentry.st_progress_command = PROGRESS_COMMAND_INVALID; lbeentry.st_progress_command_target = InvalidOid; lbeentry.st_query_id = UINT64CONST(0); + pg_atomic_init_u64(&lbeentry.buffer_action_stats.allocs, 0); + pg_atomic_init_u64(&lbeentry.buffer_action_stats.extends, 0); + pg_atomic_init_u64(&lbeentry.buffer_action_stats.fsyncs, 0); + pg_atomic_init_u64(&lbeentry.buffer_action_stats.writes, 0); + pg_atomic_init_u64(&lbeentry.buffer_action_stats.writes_strat, 0); /* * we don't zero st_progress_param here to save cycles; nobody should @@ -459,6 +464,7 @@ pgstat_beshutdown_hook(int code, Datum arg) { volatile PgBackendStatus *beentry = MyBEEntry; + /* * Clear my status entry, following the protocol of bumping st_changecount * before and after. We use a volatile pointer here to ensure the @@ -1042,6 +1048,57 @@ pgstat_get_my_query_id(void) return MyBEEntry->st_query_id; } +void +pgstat_increment_buffer_action(BufferActionType ba_type) +{ + volatile PgBackendStatus *beentry = MyBEEntry; + + if (!beentry || !pgstat_track_activities) + return; + + if (ba_type == BA_Alloc) + pg_atomic_write_u64(&beentry->buffer_action_stats.allocs, pg_atomic_read_u64(&beentry->buffer_action_stats.allocs) + 1); + else if (ba_type == BA_Extend) + pg_atomic_write_u64(&beentry->buffer_action_stats.extends, pg_atomic_read_u64(&beentry->buffer_action_stats.extends) + 1); + else if (ba_type == BA_Fsync) + pg_atomic_write_u64(&beentry->buffer_action_stats.fsyncs, pg_atomic_read_u64(&beentry->buffer_action_stats.fsyncs) + 1); + else if (ba_type == BA_Write) + pg_atomic_write_u64(&beentry->buffer_action_stats.writes, pg_atomic_read_u64(&beentry->buffer_action_stats.writes) + 1); + else if (ba_type == BA_Write_Strat) + pg_atomic_write_u64(&beentry->buffer_action_stats.writes_strat, pg_atomic_read_u64(&beentry->buffer_action_stats.writes_strat) + 1); + +} + +/* + * Called for a single backend at the time of death to persist its I/O stats + */ +bool +pgstat_record_dying_backend_buffer_actions(void) +{ + volatile PgBackendStatus *beentry = MyBEEntry; + if (!beentry) + return false; + + // TODO: should I add this or just set it -- seems like it would only happen once - + BufferActionsStats.backend_type = beentry->st_backendType; + BufferActionsStats.allocs = pg_atomic_read_u64(&beentry->buffer_action_stats.allocs); + BufferActionsStats.extends = pg_atomic_read_u64(&beentry->buffer_action_stats.extends); + BufferActionsStats.fsyncs = pg_atomic_read_u64(&beentry->buffer_action_stats.fsyncs); + BufferActionsStats.writes = pg_atomic_read_u64(&beentry->buffer_action_stats.writes); + BufferActionsStats.writes_strat = pg_atomic_read_u64(&beentry->buffer_action_stats.writes_strat); + return true; +} + +// TODO: this is clearly no good, but I'm not sure if I have to/want to/can use +// the below pgstat_fetch_stat_beentry and doing the loop that is in +// pg_stat_get_buffer_actions() into this file will likely mean having to pass a +// two-dimensional array as a parameter which is unappealing to me +volatile PgBackendStatus * +pgstat_access_backend_status_array(void) +{ + return BackendStatusArray; +} + /* ---------- * pgstat_fetch_stat_beentry() - diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c index ff5aedc99c..17d3fa942d 100644 --- a/src/backend/utils/adt/pgstatfuncs.c +++ b/src/backend/utils/adt/pgstatfuncs.c @@ -1779,21 +1779,87 @@ pg_stat_get_bgwriter_stat_reset_time(PG_FUNCTION_ARGS) } Datum -pg_stat_get_buf_written_backend(PG_FUNCTION_ARGS) +pg_stat_get_buffer_actions(PG_FUNCTION_ARGS) { - PG_RETURN_INT64(pgstat_fetch_stat_checkpointer()->buf_written_backend); -} + ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; + TupleDesc tupdesc; + Tuplestorestate *tupstore; + MemoryContext per_query_ctx; + MemoryContext oldcontext; + PgStat_MsgBufferActions *buffer_actions; + int i; + volatile PgBackendStatus *beentry; + Datum all_values[BACKEND_NUM_TYPES][BUFFER_ACTION_NUM_TYPES]; + bool all_nulls[BACKEND_NUM_TYPES][BUFFER_ACTION_NUM_TYPES]; -Datum -pg_stat_get_buf_fsync_backend(PG_FUNCTION_ARGS) -{ - PG_RETURN_INT64(pgstat_fetch_stat_checkpointer()->buf_fsync_backend); -} + /* Build a tuple descriptor for our result type */ + if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) + elog(ERROR, "return type must be a row type"); + per_query_ctx = rsinfo->econtext->ecxt_per_query_memory; + oldcontext = MemoryContextSwitchTo(per_query_ctx); -Datum -pg_stat_get_buf_alloc(PG_FUNCTION_ARGS) -{ - PG_RETURN_INT64(pgstat_fetch_stat_bgwriter()->buf_alloc); + tupstore = tuplestore_begin_heap(true, false, work_mem); + rsinfo->returnMode = SFRM_Materialize; + rsinfo->setResult = tupstore; + rsinfo->setDesc = tupdesc; + + MemoryContextSwitchTo(oldcontext); + pgstat_fetch_global(); + for (i = 1; i < BACKEND_NUM_TYPES; i++) + { + Datum *values = all_values[i]; + bool *nulls = all_nulls[i]; + + MemSet(values, 0, sizeof(Datum[BUFFER_ACTION_NUM_TYPES])); + MemSet(nulls, 0, sizeof(Datum[BUFFER_ACTION_NUM_TYPES])); + + values[0] = CStringGetTextDatum(GetBackendTypeDesc(i)); + /* + * Add stats from all exited backends + */ + buffer_actions = pgstat_get_buffer_action_stats(i); + + values[BA_Alloc] += buffer_actions->allocs; + values[BA_Extend] += buffer_actions->extends; + values[BA_Fsync] += buffer_actions->fsyncs; + values[BA_Write] += buffer_actions->writes; + values[BA_Write_Strat] += buffer_actions->writes_strat; + } + + /* + * Loop through all live backends and count their buffer actions + */ + + beentry = pgstat_access_backend_status_array(); + for (i = 0; i <= MaxBackends; i++) + { + Datum *values; + beentry++; + /* Don't count dead backends. They should already be counted */ + if (beentry->st_procpid == 0) + continue; + values = all_values[beentry->st_backendType]; + + + values[BA_Alloc] += pg_atomic_read_u64(&beentry->buffer_action_stats.allocs); + values[BA_Extend] += pg_atomic_read_u64(&beentry->buffer_action_stats.extends); + values[BA_Fsync] += pg_atomic_read_u64(&beentry->buffer_action_stats.fsyncs); + values[BA_Write] += pg_atomic_read_u64(&beentry->buffer_action_stats.writes); + values[BA_Write_Strat] += pg_atomic_read_u64(&beentry->buffer_action_stats.writes_strat); + + } + + for (i = 1; i < BACKEND_NUM_TYPES; i++) + { + Datum *values = all_values[i]; + bool *nulls = all_nulls[i]; + tuplestore_putvalues(tupstore, tupdesc, values, nulls); + } + + /* clean up and return the tuplestore */ + tuplestore_donestoring(tupstore); + + return (Datum) 0; } /* diff --git a/src/backend/utils/init/miscinit.c b/src/backend/utils/init/miscinit.c index 88801374b5..cbeaa9ab94 100644 --- a/src/backend/utils/init/miscinit.c +++ b/src/backend/utils/init/miscinit.c @@ -294,6 +294,8 @@ GetBackendTypeDesc(BackendType backendType) case B_LOGGER: backendDesc = "logger"; break; + case BACKEND_NUM_TYPES: + break; } return backendDesc; diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index b603700ed9..1f1a97ba48 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -5629,18 +5629,16 @@ proname => 'pg_stat_get_checkpoint_sync_time', provolatile => 's', proparallel => 'r', prorettype => 'float8', proargtypes => '', prosrc => 'pg_stat_get_checkpoint_sync_time' }, -{ oid => '2775', descr => 'statistics: number of buffers written by backends', - proname => 'pg_stat_get_buf_written_backend', provolatile => 's', - proparallel => 'r', prorettype => 'int8', proargtypes => '', - prosrc => 'pg_stat_get_buf_written_backend' }, -{ oid => '3063', - descr => 'statistics: number of backend buffer writes that did their own fsync', - proname => 'pg_stat_get_buf_fsync_backend', provolatile => 's', - proparallel => 'r', prorettype => 'int8', proargtypes => '', - prosrc => 'pg_stat_get_buf_fsync_backend' }, -{ oid => '2859', descr => 'statistics: number of buffer allocations', - proname => 'pg_stat_get_buf_alloc', provolatile => 's', proparallel => 'r', - prorettype => 'int8', proargtypes => '', prosrc => 'pg_stat_get_buf_alloc' }, + + +{ oid => '8459', descr => 'statistics: counts of buffer actions taken by each backend type', + proname => 'pg_stat_get_buffer_actions', provolatile => 's', proisstrict => 'f', + prorows => '13', proretset => 't', + proparallel => 'r', prorettype => 'record', proargtypes => '', + proallargtypes => '{text,int8,int8,int8,int8,int8}', + proargmodes => '{o,o,o,o,o,o}', + proargnames => '{backend_type,buffers_alloc,buffers_extend,buffers_fsync,buffers_write,buffers_write_strat}', + prosrc => 'pg_stat_get_buffer_actions' }, { oid => '1136', descr => 'statistics: information about WAL activity', proname => 'pg_stat_get_wal', proisstrict => 'f', provolatile => 's', diff --git a/src/include/miscadmin.h b/src/include/miscadmin.h index 2e2e9a364a..03d5e464a9 100644 --- a/src/include/miscadmin.h +++ b/src/include/miscadmin.h @@ -336,8 +336,21 @@ typedef enum BackendType B_ARCHIVER, B_STATS_COLLECTOR, B_LOGGER, + BACKEND_NUM_TYPES, } BackendType; +typedef enum BufferActionType +{ + BA_Invalid = 0, + BA_Alloc, + BA_Extend, + BA_Fsync, + BA_Write, + BA_Write_Strat, + BUFFER_ACTION_NUM_TYPES, +} BufferActionType; + + extern BackendType MyBackendType; extern const char *GetBackendTypeDesc(BackendType backendType); diff --git a/src/include/pgstat.h b/src/include/pgstat.h index 2068a68a5f..1948c0bc04 100644 --- a/src/include/pgstat.h +++ b/src/include/pgstat.h @@ -73,6 +73,7 @@ typedef enum StatMsgType PGSTAT_MTYPE_ARCHIVER, PGSTAT_MTYPE_BGWRITER, PGSTAT_MTYPE_CHECKPOINTER, + PGSTAT_MTYPE_BUFFER_ACTIONS, PGSTAT_MTYPE_WAL, PGSTAT_MTYPE_SLRU, PGSTAT_MTYPE_FUNCSTAT, @@ -473,7 +474,6 @@ typedef struct PgStat_MsgBgWriter PgStat_Counter m_buf_written_clean; PgStat_Counter m_maxwritten_clean; - PgStat_Counter m_buf_alloc; } PgStat_MsgBgWriter; /* ---------- @@ -487,12 +487,22 @@ typedef struct PgStat_MsgCheckpointer PgStat_Counter m_timed_checkpoints; PgStat_Counter m_requested_checkpoints; PgStat_Counter m_buf_written_checkpoints; - PgStat_Counter m_buf_written_backend; - PgStat_Counter m_buf_fsync_backend; PgStat_Counter m_checkpoint_write_time; /* times in milliseconds */ PgStat_Counter m_checkpoint_sync_time; } PgStat_MsgCheckpointer; +typedef struct PgStat_MsgBufferActions +{ + PgStat_MsgHdr m_hdr; + + BackendType backend_type; + uint64 allocs; + uint64 extends; + uint64 fsyncs; + uint64 writes; + uint64 writes_strat; +} PgStat_MsgBufferActions; + /* ---------- * PgStat_MsgWal Sent by backends and background processes to update WAL statistics. * ---------- @@ -712,6 +722,7 @@ typedef union PgStat_Msg PgStat_MsgArchiver msg_archiver; PgStat_MsgBgWriter msg_bgwriter; PgStat_MsgCheckpointer msg_checkpointer; + PgStat_MsgBufferActions msg_buffer_actions; PgStat_MsgWal msg_wal; PgStat_MsgSLRU msg_slru; PgStat_MsgFuncstat msg_funcstat; @@ -860,7 +871,6 @@ typedef struct PgStat_BgWriterStats { PgStat_Counter buf_written_clean; PgStat_Counter maxwritten_clean; - PgStat_Counter buf_alloc; TimestampTz stat_reset_timestamp; } PgStat_BgWriterStats; @@ -875,8 +885,6 @@ typedef struct PgStat_CheckpointerStats PgStat_Counter checkpoint_write_time; /* times in milliseconds */ PgStat_Counter checkpoint_sync_time; PgStat_Counter buf_written_checkpoints; - PgStat_Counter buf_written_backend; - PgStat_Counter buf_fsync_backend; } PgStat_CheckpointerStats; /* @@ -888,6 +896,7 @@ typedef struct PgStat_GlobalStats PgStat_CheckpointerStats checkpointer; PgStat_BgWriterStats bgwriter; + PgStat_MsgBufferActions buffer_actions[BACKEND_NUM_TYPES]; } PgStat_GlobalStats; /* @@ -977,6 +986,7 @@ extern PgStat_MsgBgWriter PendingBgWriterStats; */ extern PgStat_MsgCheckpointer PendingCheckpointerStats; +extern PgStat_MsgBufferActions BufferActionsStats; /* * WAL statistics counter is updated by backends and background processes */ @@ -1128,6 +1138,8 @@ extern void pgstat_twophase_postabort(TransactionId xid, uint16 info, extern void pgstat_send_archiver(const char *xlog, bool failed); extern void pgstat_send_bgwriter(void); extern void pgstat_send_checkpointer(void); +extern void pgstat_send_buffer_actions(void); +extern PgStat_MsgBufferActions * pgstat_get_buffer_action_stats(BackendType backend_type); extern void pgstat_send_wal(bool force); /* ---------- diff --git a/src/include/storage/buf_internals.h b/src/include/storage/buf_internals.h index 33fcaf5c9a..7e385135db 100644 --- a/src/include/storage/buf_internals.h +++ b/src/include/storage/buf_internals.h @@ -310,10 +310,10 @@ extern void ScheduleBufferTagForWriteback(WritebackContext *context, BufferTag * /* freelist.c */ extern BufferDesc *StrategyGetBuffer(BufferAccessStrategy strategy, - uint32 *buf_state); + uint32 *buf_state, bool *from_ring); extern void StrategyFreeBuffer(BufferDesc *buf); extern bool StrategyRejectBuffer(BufferAccessStrategy strategy, - BufferDesc *buf); + BufferDesc *buf, bool *from_ring); extern int StrategySyncStart(uint32 *complete_passes, uint32 *num_buf_alloc); extern void StrategyNotifyBgWriter(int bgwprocno); diff --git a/src/include/utils/backend_status.h b/src/include/utils/backend_status.h index 8042b817df..ecb1076ffd 100644 --- a/src/include/utils/backend_status.h +++ b/src/include/utils/backend_status.h @@ -13,6 +13,7 @@ #include "datatype/timestamp.h" #include "libpq/pqcomm.h" #include "miscadmin.h" /* for BackendType */ +#include "port/atomics.h" #include "utils/backend_progress.h" @@ -79,6 +80,14 @@ typedef struct PgBackendGSSStatus } PgBackendGSSStatus; +typedef struct PgBackendBufferActionStats +{ + pg_atomic_uint64 allocs; + pg_atomic_uint64 extends; + pg_atomic_uint64 fsyncs; + pg_atomic_uint64 writes; + pg_atomic_uint64 writes_strat; +} PgBackendBufferActionStats; /* ---------- * PgBackendStatus @@ -168,6 +177,7 @@ typedef struct PgBackendStatus /* query identifier, optionally computed using post_parse_analyze_hook */ uint64 st_query_id; + PgBackendBufferActionStats buffer_action_stats; } PgBackendStatus; @@ -306,6 +316,10 @@ extern const char *pgstat_get_crashed_backend_activity(int pid, char *buffer, int buflen); extern uint64 pgstat_get_my_query_id(void); +extern void pgstat_increment_buffer_action(BufferActionType ba_type); +extern volatile PgBackendStatus *pgstat_access_backend_status_array(void); +extern bool pgstat_record_dying_backend_buffer_actions(void); + /* ---------- * Support functions for the SQL-callable functions to diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out index e5ab11275d..609ccf3b7b 100644 --- a/src/test/regress/expected/rules.out +++ b/src/test/regress/expected/rules.out @@ -1824,10 +1824,14 @@ pg_stat_bgwriter| SELECT pg_stat_get_bgwriter_timed_checkpoints() AS checkpoints pg_stat_get_bgwriter_buf_written_checkpoints() AS buffers_checkpoint, pg_stat_get_bgwriter_buf_written_clean() AS buffers_clean, pg_stat_get_bgwriter_maxwritten_clean() AS maxwritten_clean, - pg_stat_get_buf_written_backend() AS buffers_backend, - pg_stat_get_buf_fsync_backend() AS buffers_backend_fsync, - pg_stat_get_buf_alloc() AS buffers_alloc, pg_stat_get_bgwriter_stat_reset_time() AS stats_reset; +pg_stat_buffer_actions| SELECT b.backend_type, + b.buffers_alloc, + b.buffers_extend, + b.buffers_fsync, + b.buffers_write, + b.buffers_write_strat + FROM pg_stat_get_buffer_actions() b(backend_type, buffers_alloc, buffers_extend, buffers_fsync, buffers_write, buffers_write_strat); pg_stat_database| SELECT d.oid AS datid, d.datname, CASE diff --git a/src/test/regress/sql/stats.sql b/src/test/regress/sql/stats.sql index feaaee6326..fb4b613d4b 100644 --- a/src/test/regress/sql/stats.sql +++ b/src/test/regress/sql/stats.sql @@ -176,4 +176,5 @@ FROM prevstats AS pr; DROP TABLE trunc_stats_test, trunc_stats_test1, trunc_stats_test2, trunc_stats_test3, trunc_stats_test4; DROP TABLE prevstats; +SELECT * FROM pg_stat_buffer_actions; -- End of Stats Test -- 2.27.0