Hi, On Wed, Jan 08, 2025 at 03:21:26PM +0900, Michael Paquier wrote: > On Tue, Jan 07, 2025 at 08:48:51AM +0000, Bertrand Drouvot wrote: > > Now that commit 9aea73fc61 added backend-level statistics to pgstats (and > > per backend IO statistics), we can more easily add per backend statistics. > > > > Please find attached a patch to implement $SUBJECT. > > I've looked at v1-0002 and v1-0001.
Thanks for looking at it! > +static void > +pgstat_flush_io_entry(PgStat_EntryRef *entry_ref, bool nowait, bool > need_lock) > { > - PgStatShared_Backend *shbackendioent; > - PgStat_BackendPendingIO *pendingent; > + PgStatShared_Backend *shbackendent; > + PgStat_BackendPending *pendingent; > PgStat_BktypeIO *bktype_shstats; > + PgStat_BackendPendingIO *pending_io; > > - if (!pgstat_lock_entry(entry_ref, nowait)) > - return false; > + if (need_lock && !pgstat_lock_entry(entry_ref, nowait)) > + return; > > The addition of need_lock at this level leads to a result that seems a > bit confusing, where pgstat_backend_flush_cb() passes "false" because > it locks the entry by itself as an effect of v1-0003 with the new area > for WAL. Wouldn't it be cleaner to do an extra pgstat_[un]lock_entry > dance in pgstat_backend_flush_io() instead? Another approach I can > think of that would be slightly cleaner to me is to pass a bits32 to a > single routine that would control if WAL stats, I/O stats or both > should be flushed, keeping pgstat_flush_backend() as name with an > extra argument to decide which parts of the stats should be flushed. Yeah, that's more elegant as it also means that the main callback will not change (should we add even more stats in the future). Done that way in v2 attached. > -PgStat_BackendPendingIO * > +PgStat_BackendPending * > > This rename makes sense. > > #define PG_STAT_GET_WAL_COLS 9 > TupleDesc tupdesc; > Datum values[PG_STAT_GET_WAL_COLS] = {0}; > bool nulls[PG_STAT_GET_WAL_COLS] = {0}; > > It feels unnatural to have a PG_STAT_GET_WAL_COLS while it would not > only relate to this function anymore. Has been renamed to PG_STAT_WAL_COLS in the attached. Regards, -- Bertrand Drouvot PostgreSQL Contributors Team RDS Open Source Databases Amazon Web Services: https://aws.amazon.com
>From 190422763353e87f5208e8f6d1aee823eb7860e5 Mon Sep 17 00:00:00 2001 From: Bertrand Drouvot <bertranddrouvot...@gmail.com> Date: Mon, 6 Jan 2025 07:51:27 +0000 Subject: [PATCH v2 1/3] Extract logic filling pg_stat_get_wal()'s tuple into its own routine This commit adds pg_stat_wal_build_tuple(), a helper routine for pg_stat_get_wal(), that fills its tuple based on the contents of PgStat_WalStats. This will be used in a follow-up commit that uses the same structures as pg_stat_wal for reporting, but for the PGSTAT_KIND_BACKEND statistics kind. --- src/backend/utils/adt/pgstatfuncs.c | 56 ++++++++++++++++++----------- 1 file changed, 36 insertions(+), 20 deletions(-) 100.0% src/backend/utils/adt/ diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c index 3245f3a8d8..7309f06993 100644 --- a/src/backend/utils/adt/pgstatfuncs.c +++ b/src/backend/utils/adt/pgstatfuncs.c @@ -1560,20 +1560,22 @@ pg_stat_get_backend_io(PG_FUNCTION_ARGS) } /* - * Returns statistics of WAL activity + * pg_stat_wal_build_tuple + * + * Helper routine for pg_stat_get_wal() returning one tuple based on the contents + * of wal_stats. */ -Datum -pg_stat_get_wal(PG_FUNCTION_ARGS) +static Datum +pg_stat_wal_build_tuple(PgStat_WalStats wal_stats) { -#define PG_STAT_GET_WAL_COLS 9 +#define PG_STAT_WAL_COLS 9 TupleDesc tupdesc; - Datum values[PG_STAT_GET_WAL_COLS] = {0}; - bool nulls[PG_STAT_GET_WAL_COLS] = {0}; + Datum values[PG_STAT_WAL_COLS] = {0}; + bool nulls[PG_STAT_WAL_COLS] = {0}; char buf[256]; - PgStat_WalStats *wal_stats; /* Initialise attributes information in the tuple descriptor */ - tupdesc = CreateTemplateTupleDesc(PG_STAT_GET_WAL_COLS); + tupdesc = CreateTemplateTupleDesc(PG_STAT_WAL_COLS); TupleDescInitEntry(tupdesc, (AttrNumber) 1, "wal_records", INT8OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 2, "wal_fpi", @@ -1595,34 +1597,48 @@ pg_stat_get_wal(PG_FUNCTION_ARGS) BlessTupleDesc(tupdesc); - /* Get statistics about WAL activity */ - wal_stats = pgstat_fetch_stat_wal(); - /* Fill values and NULLs */ - values[0] = Int64GetDatum(wal_stats->wal_records); - values[1] = Int64GetDatum(wal_stats->wal_fpi); + values[0] = Int64GetDatum(wal_stats.wal_records); + values[1] = Int64GetDatum(wal_stats.wal_fpi); /* Convert to numeric. */ - snprintf(buf, sizeof buf, UINT64_FORMAT, wal_stats->wal_bytes); + snprintf(buf, sizeof buf, UINT64_FORMAT, wal_stats.wal_bytes); values[2] = DirectFunctionCall3(numeric_in, CStringGetDatum(buf), ObjectIdGetDatum(0), Int32GetDatum(-1)); - values[3] = Int64GetDatum(wal_stats->wal_buffers_full); - values[4] = Int64GetDatum(wal_stats->wal_write); - values[5] = Int64GetDatum(wal_stats->wal_sync); + values[3] = Int64GetDatum(wal_stats.wal_buffers_full); + values[4] = Int64GetDatum(wal_stats.wal_write); + values[5] = Int64GetDatum(wal_stats.wal_sync); /* Convert counters from microsec to millisec for display */ - values[6] = Float8GetDatum(((double) wal_stats->wal_write_time) / 1000.0); - values[7] = Float8GetDatum(((double) wal_stats->wal_sync_time) / 1000.0); + values[6] = Float8GetDatum(((double) wal_stats.wal_write_time) / 1000.0); + values[7] = Float8GetDatum(((double) wal_stats.wal_sync_time) / 1000.0); - values[8] = TimestampTzGetDatum(wal_stats->stat_reset_timestamp); + if (wal_stats.stat_reset_timestamp != 0) + values[8] = TimestampTzGetDatum(wal_stats.stat_reset_timestamp); + else + nulls[8] = true; /* Returns the record as Datum */ PG_RETURN_DATUM(HeapTupleGetDatum(heap_form_tuple(tupdesc, values, nulls))); } +/* + * Returns statistics of WAL activity + */ +Datum +pg_stat_get_wal(PG_FUNCTION_ARGS) +{ + PgStat_WalStats *wal_stats; + + /* Get statistics about WAL activity */ + wal_stats = pgstat_fetch_stat_wal(); + + return (pg_stat_wal_build_tuple(*wal_stats)); +} + /* * Returns statistics of SLRU caches. */ -- 2.34.1
>From 8477a4931e7dc61a42d8c08ccc13d6f12d7e5707 Mon Sep 17 00:00:00 2001 From: Bertrand Drouvot <bertranddrouvot...@gmail.com> Date: Mon, 6 Jan 2025 08:44:29 +0000 Subject: [PATCH v2 2/3] PGSTAT_KIND_BACKEND code refactoring This commit refactors some come related to per backend statistics. It makes the code more generic or more IO statistics focused as it will be used in a follow-up commit that will introduce per backend WAL statistics. --- src/backend/utils/activity/pgstat.c | 2 +- src/backend/utils/activity/pgstat_backend.c | 88 +++++++++++++------- src/backend/utils/activity/pgstat_io.c | 8 +- src/backend/utils/activity/pgstat_relation.c | 4 +- src/backend/utils/adt/pgstatfuncs.c | 2 +- src/include/pgstat.h | 6 +- src/include/utils/pgstat_internal.h | 5 +- src/tools/pgindent/typedefs.list | 1 + 8 files changed, 76 insertions(+), 40 deletions(-) 85.6% src/backend/utils/activity/ 7.2% src/include/utils/ 4.2% src/include/ diff --git a/src/backend/utils/activity/pgstat.c b/src/backend/utils/activity/pgstat.c index 16a03b8ce1..34520535d5 100644 --- a/src/backend/utils/activity/pgstat.c +++ b/src/backend/utils/activity/pgstat.c @@ -370,7 +370,7 @@ static const PgStat_KindInfo pgstat_kind_builtin_infos[PGSTAT_KIND_BUILTIN_SIZE] .shared_size = sizeof(PgStatShared_Backend), .shared_data_off = offsetof(PgStatShared_Backend, stats), .shared_data_len = sizeof(((PgStatShared_Backend *) 0)->stats), - .pending_size = sizeof(PgStat_BackendPendingIO), + .pending_size = sizeof(PgStat_BackendPending), .flush_pending_cb = pgstat_backend_flush_cb, .reset_timestamp_cb = pgstat_backend_reset_timestamp_cb, diff --git a/src/backend/utils/activity/pgstat_backend.c b/src/backend/utils/activity/pgstat_backend.c index 1f91bfef0a..0ae507160d 100644 --- a/src/backend/utils/activity/pgstat_backend.c +++ b/src/backend/utils/activity/pgstat_backend.c @@ -24,6 +24,12 @@ #include "utils/pgstat_internal.h" +/* flag bits for different types of statistics to flush */ +#define PGSTAT_FLUSH_IO (1 << 0) /* Flush I/O statistics */ +#define PGSTAT_FLUSH_ALL (PGSTAT_FLUSH_IO) + +static void pgstat_flush_io_entry(PgStat_EntryRef *entry_ref); + /* * Returns statistics of a backend by proc number. */ @@ -39,23 +45,49 @@ pgstat_fetch_stat_backend(ProcNumber procNumber) } /* - * Flush out locally pending backend statistics - * - * If no stats have been recorded, this function returns false. + * Main function to flush backend statistics. + * The "stats_to_flush" parameter controls which statistics to flush. */ -bool -pgstat_backend_flush_cb(PgStat_EntryRef *entry_ref, bool nowait) +static bool +pgstat_flush_backend(PgStat_EntryRef *entry_ref, bool nowait, + bits32 stats_to_flush) { - PgStatShared_Backend *shbackendioent; - PgStat_BackendPendingIO *pendingent; - PgStat_BktypeIO *bktype_shstats; + if (!pgstat_tracks_backend_bktype(MyBackendType)) + return false; + + /* Get our own entry_ref if not provided */ + if (!entry_ref) + entry_ref = pgstat_get_entry_ref(PGSTAT_KIND_BACKEND, InvalidOid, + MyProcNumber, false, NULL); if (!pgstat_lock_entry(entry_ref, nowait)) return false; - shbackendioent = (PgStatShared_Backend *) entry_ref->shared_stats; - bktype_shstats = &shbackendioent->stats.stats; - pendingent = (PgStat_BackendPendingIO *) entry_ref->pending; + /* Flush requested statistics */ + if (stats_to_flush & PGSTAT_FLUSH_IO) + pgstat_flush_io_entry(entry_ref); + + pgstat_unlock_entry(entry_ref); + + return true; +} + +/* + * Flush out locally pending backend IO statistics. + * Locking is managed by the caller. + */ +static void +pgstat_flush_io_entry(PgStat_EntryRef *entry_ref) +{ + PgStatShared_Backend *shbackendent; + PgStat_BackendPending *pendingent; + PgStat_BktypeIO *bktype_shstats; + PgStat_BackendPendingIO *pending_io; + + shbackendent = (PgStatShared_Backend *) entry_ref->shared_stats; + pendingent = (PgStat_BackendPending *) entry_ref->pending; + bktype_shstats = &shbackendent->stats.io_stats; + pending_io = &pendingent->pending_io; for (int io_object = 0; io_object < IOOBJECT_NUM_TYPES; io_object++) { @@ -66,35 +98,35 @@ pgstat_backend_flush_cb(PgStat_EntryRef *entry_ref, bool nowait) instr_time time; bktype_shstats->counts[io_object][io_context][io_op] += - pendingent->counts[io_object][io_context][io_op]; + pending_io->counts[io_object][io_context][io_op]; - time = pendingent->pending_times[io_object][io_context][io_op]; + time = pending_io->pending_times[io_object][io_context][io_op]; bktype_shstats->times[io_object][io_context][io_op] += INSTR_TIME_GET_MICROSEC(time); } } } +} - pgstat_unlock_entry(entry_ref); - - return true; +/* + * Flush out locally pending backend statistics + * + * If no stats have been recorded, this function returns false. + */ +bool +pgstat_backend_flush_cb(PgStat_EntryRef *entry_ref, bool nowait) +{ + return pgstat_flush_backend(entry_ref, nowait, PGSTAT_FLUSH_ALL); } /* - * Simpler wrapper of pgstat_backend_flush_cb() + * Convenience wrapper to flush I/O statistics. */ void -pgstat_flush_backend(bool nowait) +pgstat_backend_flush_io(bool nowait) { - PgStat_EntryRef *entry_ref; - - if (!pgstat_tracks_backend_bktype(MyBackendType)) - return; - - entry_ref = pgstat_get_entry_ref(PGSTAT_KIND_BACKEND, InvalidOid, - MyProcNumber, false, NULL); - (void) pgstat_backend_flush_cb(entry_ref, nowait); + pgstat_flush_backend(NULL, nowait, PGSTAT_FLUSH_IO); } /* @@ -119,9 +151,9 @@ pgstat_create_backend(ProcNumber procnum) } /* - * Find or create a local PgStat_BackendPendingIO entry for proc number. + * Find or create a local PgStat_BackendPending entry for proc number. */ -PgStat_BackendPendingIO * +PgStat_BackendPending * pgstat_prep_backend_pending(ProcNumber procnum) { PgStat_EntryRef *entry_ref; diff --git a/src/backend/utils/activity/pgstat_io.c b/src/backend/utils/activity/pgstat_io.c index f9a1f91dba..a7445995d3 100644 --- a/src/backend/utils/activity/pgstat_io.c +++ b/src/backend/utils/activity/pgstat_io.c @@ -81,10 +81,10 @@ pgstat_count_io_op_n(IOObject io_object, IOContext io_context, IOOp io_op, uint3 if (pgstat_tracks_backend_bktype(MyBackendType)) { - PgStat_PendingIO *entry_ref; + PgStat_BackendPending *entry_ref; entry_ref = pgstat_prep_backend_pending(MyProcNumber); - entry_ref->counts[io_object][io_context][io_op] += cnt; + entry_ref->pending_io.counts[io_object][io_context][io_op] += cnt; } PendingIOStats.counts[io_object][io_context][io_op] += cnt; @@ -151,10 +151,10 @@ pgstat_count_io_op_time(IOObject io_object, IOContext io_context, IOOp io_op, if (pgstat_tracks_backend_bktype(MyBackendType)) { - PgStat_PendingIO *entry_ref; + PgStat_BackendPending *entry_ref; entry_ref = pgstat_prep_backend_pending(MyProcNumber); - INSTR_TIME_ADD(entry_ref->pending_times[io_object][io_context][io_op], + INSTR_TIME_ADD(entry_ref->pending_io.pending_times[io_object][io_context][io_op], io_time); } } diff --git a/src/backend/utils/activity/pgstat_relation.c b/src/backend/utils/activity/pgstat_relation.c index 2cc304f881..6092826479 100644 --- a/src/backend/utils/activity/pgstat_relation.c +++ b/src/backend/utils/activity/pgstat_relation.c @@ -264,7 +264,7 @@ pgstat_report_vacuum(Oid tableoid, bool shared, * VACUUM command has processed all tables and committed. */ pgstat_flush_io(false); - pgstat_flush_backend(false); + pgstat_backend_flush_io(false); } /* @@ -351,7 +351,7 @@ pgstat_report_analyze(Relation rel, /* see pgstat_report_vacuum() */ pgstat_flush_io(false); - pgstat_flush_backend(false); + pgstat_backend_flush_io(false); } /* diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c index 7309f06993..8a4340e977 100644 --- a/src/backend/utils/adt/pgstatfuncs.c +++ b/src/backend/utils/adt/pgstatfuncs.c @@ -1544,7 +1544,7 @@ pg_stat_get_backend_io(PG_FUNCTION_ARGS) if (bktype == B_INVALID) return (Datum) 0; - bktype_stats = &backend_stats->stats; + bktype_stats = &backend_stats->io_stats; /* * In Assert builds, we can afford an extra loop through all of the diff --git a/src/include/pgstat.h b/src/include/pgstat.h index 0d8427f27d..6631bd2d73 100644 --- a/src/include/pgstat.h +++ b/src/include/pgstat.h @@ -381,7 +381,7 @@ typedef PgStat_PendingIO PgStat_BackendPendingIO; typedef struct PgStat_Backend { TimestampTz stat_reset_timestamp; - PgStat_BktypeIO stats; + PgStat_BktypeIO io_stats; } PgStat_Backend; typedef struct PgStat_StatDBEntry @@ -523,6 +523,10 @@ typedef struct PgStat_PendingWalStats instr_time wal_sync_time; } PgStat_PendingWalStats; +typedef struct PgStat_BackendPending +{ + PgStat_BackendPendingIO pending_io; +} PgStat_BackendPending; /* * Functions in pgstat.c diff --git a/src/include/utils/pgstat_internal.h b/src/include/utils/pgstat_internal.h index 52eb008710..320cd5a842 100644 --- a/src/include/utils/pgstat_internal.h +++ b/src/include/utils/pgstat_internal.h @@ -613,9 +613,8 @@ extern void pgstat_archiver_snapshot_cb(void); * Functions in pgstat_backend.c */ -extern void pgstat_flush_backend(bool nowait); - -extern PgStat_BackendPendingIO *pgstat_prep_backend_pending(ProcNumber procnum); +extern void pgstat_backend_flush_io(bool nowait); +extern PgStat_BackendPending *pgstat_prep_backend_pending(ProcNumber procnum); extern bool pgstat_backend_flush_cb(PgStat_EntryRef *entry_ref, bool nowait); extern void pgstat_backend_reset_timestamp_cb(PgStatShared_Common *header, TimestampTz ts); diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index 9f83ecf181..f15526236a 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -2140,6 +2140,7 @@ PgStatShared_Subscription PgStatShared_Wal PgStat_ArchiverStats PgStat_Backend +PgStat_BackendPending PgStat_BackendPendingIO PgStat_BackendSubEntry PgStat_BgWriterStats -- 2.34.1
>From 158dd7d6da4ee39a1743c44ca6385aba33a826e2 Mon Sep 17 00:00:00 2001 From: Bertrand Drouvot <bertranddrouvot...@gmail.com> Date: Mon, 6 Jan 2025 10:00:00 +0000 Subject: [PATCH v2 3/3] per backend WAL statistics Now that commit 9aea73fc61 added backend-level statistics to pgstats (and per backend IO statistics) we can more easily add per backend statistics. This commit adds per backend WAL statistics using the same layer as pg_stat_wal, except that it is now possible to know how much WAL activity is happening in each backend rather than an overall aggregate of all the activity. A function called pg_stat_get_backend_wal() is added to access this data depending on the PID of a backend. The same limitation as in 9aea73fc61 persists, meaning that Auxiliary processes are not included in this set of statistics. XXX: bump catalog version --- doc/src/sgml/config.sgml | 4 +- doc/src/sgml/monitoring.sgml | 19 ++++ src/backend/access/transam/xlog.c | 36 +++++++- src/backend/utils/activity/pgstat_backend.c | 99 ++++++++++++++++++++- src/backend/utils/activity/pgstat_wal.c | 6 +- src/backend/utils/adt/pgstatfuncs.c | 78 +++++++++++++--- src/include/catalog/pg_proc.dat | 7 ++ src/include/pgstat.h | 37 ++++++-- src/include/utils/pgstat_internal.h | 1 + src/test/regress/expected/stats.out | 14 +++ src/test/regress/sql/stats.sql | 6 ++ src/tools/pgindent/typedefs.list | 2 + 12 files changed, 279 insertions(+), 30 deletions(-) 9.9% doc/src/sgml/ 8.6% src/backend/access/transam/ 34.4% src/backend/utils/activity/ 27.8% src/backend/utils/adt/ 4.9% src/include/catalog/ 6.6% src/include/ 3.9% src/test/regress/expected/ 3.0% src/test/regress/sql/ diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index 8683f0bdf5..8e8478dcb1 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -8433,7 +8433,9 @@ COPY postgres_log FROM '/full/path/to/logfile.csv' WITH csv; measure the overhead of timing on your system. I/O timing information is displayed in <link linkend="monitoring-pg-stat-wal-view"> - <structname>pg_stat_wal</structname></link>. + <structname>pg_stat_wal</structname></link> and in the output of the + <link linkend="pg-stat-get-backend-wal"> + <function>pg_stat_get_backend_wal()</function></link> function. Only superusers and users with the appropriate <literal>SET</literal> privilege can change this setting. </para> diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml index d0d176cc54..84a2d09b76 100644 --- a/doc/src/sgml/monitoring.sgml +++ b/doc/src/sgml/monitoring.sgml @@ -4811,6 +4811,25 @@ description | Waiting for a newly initialized WAL file to reach durable storage </para></entry> </row> + <row> + <entry id="pg-stat-get-backend-wal" role="func_table_entry"><para role="func_signature"> + <indexterm> + <primary>pg_stat_get_backend_wal</primary> + </indexterm> + <function>pg_stat_get_backend_wal</function> ( <type>integer</type> ) + <returnvalue>record</returnvalue> + </para> + <para> + Returns WAL statistics about the backend with the specified + process ID. The output fields are exactly the same as the ones in the + <structname>pg_stat_wal</structname> view. + </para> + <para> + The function does not return WAL statistics for the checkpointer, + the background writer, the startup process and the autovacuum launcher. + </para></entry> + </row> + <row> <entry role="func_table_entry"><para role="func_signature"> <indexterm> diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index bf3dbda901..0ba9fcb277 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -2058,6 +2058,10 @@ AdvanceXLInsertBuffer(XLogRecPtr upto, TimeLineID tli, bool opportunistic) XLogWrite(WriteRqst, tli, false); LWLockRelease(WALWriteLock); PendingWalStats.wal_buffers_full++; + + if (pgstat_tracks_backend_bktype(MyBackendType)) + PendingBackendWalStats.wal_buffers_full++; + TRACE_POSTGRESQL_WAL_BUFFER_WRITE_DIRTY_DONE(); } /* Re-acquire WALBufMappingLock and retry */ @@ -2426,11 +2430,14 @@ XLogWrite(XLogwrtRqst WriteRqst, TimeLineID tli, bool flexible) Size nleft; ssize_t written; instr_time start; + instr_time end; /* OK to write the page(s) */ from = XLogCtl->pages + startidx * (Size) XLOG_BLCKSZ; nbytes = npages * (Size) XLOG_BLCKSZ; nleft = nbytes; + /* keep compiler quiet */ + INSTR_TIME_SET_ZERO(end); do { errno = 0; @@ -2451,14 +2458,26 @@ XLogWrite(XLogwrtRqst WriteRqst, TimeLineID tli, bool flexible) */ if (track_wal_io_timing) { - instr_time end; - INSTR_TIME_SET_CURRENT(end); INSTR_TIME_ACCUM_DIFF(PendingWalStats.wal_write_time, end, start); } PendingWalStats.wal_write++; + if (pgstat_tracks_backend_bktype(MyBackendType)) + { + /* + * We are inside a critical section, so we can't use + * pgstat_prep_pending_entry() and we rely on + * PendingBackendWalStats instead. + */ + PendingBackendWalStats.wal_write++; + + if (track_wal_io_timing) + INSTR_TIME_ACCUM_DIFF(PendingBackendWalStats.wal_write_time, + end, start); + } + if (written <= 0) { char xlogfname[MAXFNAMELEN]; @@ -8684,8 +8703,11 @@ issue_xlog_fsync(int fd, XLogSegNo segno, TimeLineID tli) { char *msg = NULL; instr_time start; + instr_time end; Assert(tli != 0); + /* keep compiler quiet */ + INSTR_TIME_SET_ZERO(end); /* * Quick exit if fsync is disabled or write() has already synced the WAL @@ -8751,13 +8773,19 @@ issue_xlog_fsync(int fd, XLogSegNo segno, TimeLineID tli) */ if (track_wal_io_timing) { - instr_time end; - INSTR_TIME_SET_CURRENT(end); INSTR_TIME_ACCUM_DIFF(PendingWalStats.wal_sync_time, end, start); } PendingWalStats.wal_sync++; + + if (pgstat_tracks_backend_bktype(MyBackendType)) + { + PendingBackendWalStats.wal_sync++; + + if (track_wal_io_timing) + INSTR_TIME_ACCUM_DIFF(PendingBackendWalStats.wal_sync_time, end, start); + } } /* diff --git a/src/backend/utils/activity/pgstat_backend.c b/src/backend/utils/activity/pgstat_backend.c index 0ae507160d..58c41edea4 100644 --- a/src/backend/utils/activity/pgstat_backend.c +++ b/src/backend/utils/activity/pgstat_backend.c @@ -26,9 +26,21 @@ /* flag bits for different types of statistics to flush */ #define PGSTAT_FLUSH_IO (1 << 0) /* Flush I/O statistics */ -#define PGSTAT_FLUSH_ALL (PGSTAT_FLUSH_IO) +#define PGSTAT_FLUSH_WAL (1 << 1) /* Flush WAL statistics */ +#define PGSTAT_FLUSH_ALL (PGSTAT_FLUSH_IO | PGSTAT_FLUSH_WAL) + +PgStat_PendingWalStats PendingBackendWalStats = {0}; + +/* + * WAL usage counters saved from pgWalUsage at the previous call to + * pgstat_report_wal(). This is used to calculate how much WAL usage + * happens between pgstat_report_wal() calls, by subtracting + * the previous counters from the current ones. + */ +static WalUsage prevBackendWalUsage; static void pgstat_flush_io_entry(PgStat_EntryRef *entry_ref); +static void pgstat_flush_wal_entry(PgStat_EntryRef *entry_ref); /* * Returns statistics of a backend by proc number. @@ -67,6 +79,9 @@ pgstat_flush_backend(PgStat_EntryRef *entry_ref, bool nowait, if (stats_to_flush & PGSTAT_FLUSH_IO) pgstat_flush_io_entry(entry_ref); + if (stats_to_flush & PGSTAT_FLUSH_WAL) + pgstat_flush_wal_entry(entry_ref); + pgstat_unlock_entry(entry_ref); return true; @@ -109,6 +124,75 @@ pgstat_flush_io_entry(PgStat_EntryRef *entry_ref) } } +/* + * To determine whether any WAL activity has occurred since last time, not + * only the number of generated WAL records but also the numbers of WAL + * writes and syncs need to be checked. Because even transaction that + * generates no WAL records can write or sync WAL data when flushing the + * data pages. + */ +static bool +pgstat_backend_wal_have_pending(void) +{ + return pgWalUsage.wal_records != prevBackendWalUsage.wal_records || + PendingBackendWalStats.wal_write != 0 || + PendingBackendWalStats.wal_sync != 0; +} + +/* + * Flush out locally pending backend WAL statistics. + * Locking is managed by the caller. + */ +static void +pgstat_flush_wal_entry(PgStat_EntryRef *entry_ref) +{ + PgStatShared_Backend *shbackendent; + PgStat_WalCounters *bktype_shstats; + WalUsage wal_usage_diff = {0}; + + /* + * This function can be called even if nothing at all has happened. Avoid + * taking lock for nothing in that case. + */ + if (!pgstat_backend_wal_have_pending()) + return; + + shbackendent = (PgStatShared_Backend *) entry_ref->shared_stats; + bktype_shstats = &shbackendent->stats.wal_stats; + + /* + * We don't update the WAL usage portion of the local WalStats elsewhere. + * Calculate how much WAL usage counters were increased by subtracting the + * previous counters from the current ones. + */ + WalUsageAccumDiff(&wal_usage_diff, &pgWalUsage, &prevBackendWalUsage); + +#define WALSTAT_ACC(fld, var_to_add) \ + (bktype_shstats->fld += var_to_add.fld) +#define WALSTAT_ACC_INSTR_TIME(fld) \ + (bktype_shstats->fld += INSTR_TIME_GET_MICROSEC(PendingBackendWalStats.fld)) + WALSTAT_ACC(wal_buffers_full, PendingBackendWalStats); + WALSTAT_ACC(wal_write, PendingBackendWalStats); + WALSTAT_ACC(wal_sync, PendingBackendWalStats); + WALSTAT_ACC(wal_records, wal_usage_diff); + WALSTAT_ACC(wal_fpi, wal_usage_diff); + WALSTAT_ACC(wal_bytes, wal_usage_diff); + WALSTAT_ACC_INSTR_TIME(wal_write_time); + WALSTAT_ACC_INSTR_TIME(wal_sync_time); +#undef WALSTAT_ACC_INSTR_TIME +#undef WALSTAT_ACC + + /* + * Save the current counters for the subsequent calculation of WAL usage. + */ + prevBackendWalUsage = pgWalUsage; + + /* + * Clear out the statistics buffer, so it can be re-used. + */ + MemSet(&PendingBackendWalStats, 0, sizeof(PendingWalStats)); +} + /* * Flush out locally pending backend statistics * @@ -129,6 +213,15 @@ pgstat_backend_flush_io(bool nowait) pgstat_flush_backend(NULL, nowait, PGSTAT_FLUSH_IO); } +/* + * Convenience wrapper to flush WAL statistics. + */ +void +pgstat_backend_flush_wal(bool nowait) +{ + pgstat_flush_backend(NULL, nowait, PGSTAT_FLUSH_WAL); +} + /* * Create backend statistics entry for proc number. */ @@ -148,10 +241,12 @@ pgstat_create_backend(ProcNumber procnum) * e.g. if we previously used this proc number. */ memset(&shstatent->stats, 0, sizeof(shstatent->stats)); + + prevBackendWalUsage = pgWalUsage; } /* - * Find or create a local PgStat_BackendPending entry for proc number. + * Find or create a local PgStat_BackendPendingIO entry for proc number. */ PgStat_BackendPending * pgstat_prep_backend_pending(ProcNumber procnum) diff --git a/src/backend/utils/activity/pgstat_wal.c b/src/backend/utils/activity/pgstat_wal.c index 18fa6b2936..160ce99a4c 100644 --- a/src/backend/utils/activity/pgstat_wal.c +++ b/src/backend/utils/activity/pgstat_wal.c @@ -55,6 +55,8 @@ pgstat_report_wal(bool force) /* flush wal stats */ pgstat_flush_wal(nowait); + pgstat_backend_flush_wal(nowait); + /* flush IO stats */ pgstat_flush_io(nowait); } @@ -117,9 +119,9 @@ pgstat_wal_flush_cb(bool nowait) return true; #define WALSTAT_ACC(fld, var_to_add) \ - (stats_shmem->stats.fld += var_to_add.fld) + (stats_shmem->stats.wal_counters.fld += var_to_add.fld) #define WALSTAT_ACC_INSTR_TIME(fld) \ - (stats_shmem->stats.fld += INSTR_TIME_GET_MICROSEC(PendingWalStats.fld)) + (stats_shmem->stats.wal_counters.fld += INSTR_TIME_GET_MICROSEC(PendingWalStats.fld)) WALSTAT_ACC(wal_records, wal_usage_diff); WALSTAT_ACC(wal_fpi, wal_usage_diff); WALSTAT_ACC(wal_bytes, wal_usage_diff); diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c index 8a4340e977..8d251dfcca 100644 --- a/src/backend/utils/adt/pgstatfuncs.c +++ b/src/backend/utils/adt/pgstatfuncs.c @@ -1562,11 +1562,12 @@ pg_stat_get_backend_io(PG_FUNCTION_ARGS) /* * pg_stat_wal_build_tuple * - * Helper routine for pg_stat_get_wal() returning one tuple based on the contents - * of wal_stats. + * Helper routine for pg_stat_get_wal() and pg_stat_get_backend_wal() returning + * one tuple based on the contents of wal_counters. */ static Datum -pg_stat_wal_build_tuple(PgStat_WalStats wal_stats) +pg_stat_wal_build_tuple(PgStat_WalCounters wal_counters, + TimestampTz stat_reset_timestamp) { #define PG_STAT_WAL_COLS 9 TupleDesc tupdesc; @@ -1598,26 +1599,26 @@ pg_stat_wal_build_tuple(PgStat_WalStats wal_stats) BlessTupleDesc(tupdesc); /* Fill values and NULLs */ - values[0] = Int64GetDatum(wal_stats.wal_records); - values[1] = Int64GetDatum(wal_stats.wal_fpi); + values[0] = Int64GetDatum(wal_counters.wal_records); + values[1] = Int64GetDatum(wal_counters.wal_fpi); /* Convert to numeric. */ - snprintf(buf, sizeof buf, UINT64_FORMAT, wal_stats.wal_bytes); + snprintf(buf, sizeof buf, UINT64_FORMAT, wal_counters.wal_bytes); values[2] = DirectFunctionCall3(numeric_in, CStringGetDatum(buf), ObjectIdGetDatum(0), Int32GetDatum(-1)); - values[3] = Int64GetDatum(wal_stats.wal_buffers_full); - values[4] = Int64GetDatum(wal_stats.wal_write); - values[5] = Int64GetDatum(wal_stats.wal_sync); + values[3] = Int64GetDatum(wal_counters.wal_buffers_full); + values[4] = Int64GetDatum(wal_counters.wal_write); + values[5] = Int64GetDatum(wal_counters.wal_sync); /* Convert counters from microsec to millisec for display */ - values[6] = Float8GetDatum(((double) wal_stats.wal_write_time) / 1000.0); - values[7] = Float8GetDatum(((double) wal_stats.wal_sync_time) / 1000.0); + values[6] = Float8GetDatum(((double) wal_counters.wal_write_time) / 1000.0); + values[7] = Float8GetDatum(((double) wal_counters.wal_sync_time) / 1000.0); - if (wal_stats.stat_reset_timestamp != 0) - values[8] = TimestampTzGetDatum(wal_stats.stat_reset_timestamp); + if (stat_reset_timestamp != 0) + values[8] = TimestampTzGetDatum(stat_reset_timestamp); else nulls[8] = true; @@ -1625,6 +1626,55 @@ pg_stat_wal_build_tuple(PgStat_WalStats wal_stats) PG_RETURN_DATUM(HeapTupleGetDatum(heap_form_tuple(tupdesc, values, nulls))); } +/* + * Returns WAL statistics for a backend with given PID. + */ +Datum +pg_stat_get_backend_wal(PG_FUNCTION_ARGS) +{ + int pid; + PGPROC *proc; + ProcNumber procNumber; + PgStat_Backend *backend_stats; + PgStat_WalCounters bktype_stats; + PgBackendStatus *beentry; + + pid = PG_GETARG_INT32(0); + proc = BackendPidGetProc(pid); + + /* + * This could be an auxiliary process but these do not report backend + * statistics due to pgstat_tracks_backend_bktype(), so there is no need + * for an extra call to AuxiliaryPidGetProc(). + */ + if (!proc) + PG_RETURN_NULL(); + + procNumber = GetNumberFromPGProc(proc); + + beentry = pgstat_get_beentry_by_proc_number(procNumber); + if (!beentry) + PG_RETURN_NULL(); + + backend_stats = pgstat_fetch_stat_backend(procNumber); + if (!backend_stats) + PG_RETURN_NULL(); + + /* if PID does not match, leave */ + if (beentry->st_procpid != pid) + PG_RETURN_NULL(); + + /* backend may be gone, so recheck in case */ + if (beentry->st_backendType == B_INVALID) + PG_RETURN_NULL(); + + bktype_stats = backend_stats->wal_stats; + + /* save tuples with data from this PgStat_BktypeIO */ + return (pg_stat_wal_build_tuple(bktype_stats, backend_stats->stat_reset_timestamp)); +} + + /* * Returns statistics of WAL activity */ @@ -1636,7 +1686,7 @@ pg_stat_get_wal(PG_FUNCTION_ARGS) /* Get statistics about WAL activity */ wal_stats = pgstat_fetch_stat_wal(); - return (pg_stat_wal_build_tuple(*wal_stats)); + return (pg_stat_wal_build_tuple(wal_stats->wal_counters, wal_stats->stat_reset_timestamp)); } /* diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index b37e8a6f88..72a5dae4b1 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -5929,6 +5929,13 @@ proargmodes => '{o,o,o,o,o,o,o,o,o}', proargnames => '{wal_records,wal_fpi,wal_bytes,wal_buffers_full,wal_write,wal_sync,wal_write_time,wal_sync_time,stats_reset}', prosrc => 'pg_stat_get_wal' }, +{ oid => '8037', descr => 'statistics: backend WAL activity', + proname => 'pg_stat_get_backend_wal', provolatile => 'v', + proparallel => 'r', prorettype => 'record', proargtypes => 'int4', + proallargtypes => '{int4,int8,int8,numeric,int8,int8,int8,float8,float8,timestamptz}', + proargmodes => '{i,o,o,o,o,o,o,o,o,o}', + proargnames => '{backend_pid,wal_records,wal_fpi,wal_bytes,wal_buffers_full,wal_write,wal_sync,wal_write_time,wal_sync_time,stats_reset}', + prosrc => 'pg_stat_get_backend_wal' }, { oid => '6248', descr => 'statistics: information about WAL prefetching', proname => 'pg_stat_get_recovery_prefetch', prorows => '1', proretset => 't', provolatile => 'v', prorettype => 'record', proargtypes => '', diff --git a/src/include/pgstat.h b/src/include/pgstat.h index 6631bd2d73..045877c5a8 100644 --- a/src/include/pgstat.h +++ b/src/include/pgstat.h @@ -378,12 +378,6 @@ typedef struct PgStat_IO /* Backend statistics store the same amount of IO data as PGSTAT_KIND_IO */ typedef PgStat_PendingIO PgStat_BackendPendingIO; -typedef struct PgStat_Backend -{ - TimestampTz stat_reset_timestamp; - PgStat_BktypeIO io_stats; -} PgStat_Backend; - typedef struct PgStat_StatDBEntry { PgStat_Counter xact_commit; @@ -495,7 +489,7 @@ typedef struct PgStat_StatTabEntry PgStat_Counter autoanalyze_count; } PgStat_StatTabEntry; -typedef struct PgStat_WalStats +typedef struct PgStat_WalCounters { PgStat_Counter wal_records; PgStat_Counter wal_fpi; @@ -505,6 +499,11 @@ typedef struct PgStat_WalStats PgStat_Counter wal_sync; PgStat_Counter wal_write_time; PgStat_Counter wal_sync_time; +} PgStat_WalCounters; + +typedef struct PgStat_WalStats +{ + PgStat_WalCounters wal_counters; TimestampTz stat_reset_timestamp; } PgStat_WalStats; @@ -523,9 +522,21 @@ typedef struct PgStat_PendingWalStats instr_time wal_sync_time; } PgStat_PendingWalStats; +typedef struct PgStat_Backend +{ + TimestampTz stat_reset_timestamp; + PgStat_BktypeIO io_stats; + PgStat_WalCounters wal_stats; +} PgStat_Backend; + typedef struct PgStat_BackendPending { PgStat_BackendPendingIO pending_io; + + /* + * We are not creating one member for PgStat_PendingWalStats. See the + * comment above the PendingBackendWalStats definition as to why. + */ } PgStat_BackendPending; /* @@ -857,5 +868,17 @@ extern PGDLLIMPORT SessionEndType pgStatSessionEndCause; /* updated directly by backends and background processes */ extern PGDLLIMPORT PgStat_PendingWalStats PendingWalStats; +/* + * Variables in pgstat_backend.c + */ + +/* updated directly by backends and background processes */ + +/* + * WAL pending statistics are incremented inside a critical section + * (see XLogWrite()), so we can't use pgstat_prep_pending_entry() and we rely on + * PendingBackendWalStats instead. + */ +extern PGDLLIMPORT PgStat_PendingWalStats PendingBackendWalStats; #endif /* PGSTAT_H */ diff --git a/src/include/utils/pgstat_internal.h b/src/include/utils/pgstat_internal.h index 320cd5a842..06681b9102 100644 --- a/src/include/utils/pgstat_internal.h +++ b/src/include/utils/pgstat_internal.h @@ -614,6 +614,7 @@ extern void pgstat_archiver_snapshot_cb(void); */ extern void pgstat_backend_flush_io(bool nowait); +extern void pgstat_backend_flush_wal(bool nowait); extern PgStat_BackendPending *pgstat_prep_backend_pending(ProcNumber procnum); extern bool pgstat_backend_flush_cb(PgStat_EntryRef *entry_ref, bool nowait); extern void pgstat_backend_reset_timestamp_cb(PgStatShared_Common *header, TimestampTz ts); diff --git a/src/test/regress/expected/stats.out b/src/test/regress/expected/stats.out index a0317b7208..cc01fdf274 100644 --- a/src/test/regress/expected/stats.out +++ b/src/test/regress/expected/stats.out @@ -832,6 +832,8 @@ SELECT sessions > :db_stat_sessions FROM pg_stat_database WHERE datname = (SELEC SELECT num_requested AS rqst_ckpts_before FROM pg_stat_checkpointer \gset -- Test pg_stat_wal (and make a temp table so our temp schema exists) SELECT wal_bytes AS wal_bytes_before FROM pg_stat_wal \gset +-- Test pg_stat_get_backend_wal (and make a temp table so our temp schema exists) +SELECT wal_bytes AS backend_wal_bytes_before from pg_stat_get_backend_wal(pg_backend_pid()) \gset CREATE TEMP TABLE test_stats_temp AS SELECT 17; DROP TABLE test_stats_temp; -- Checkpoint twice: The checkpointer reports stats after reporting completion @@ -851,6 +853,18 @@ SELECT wal_bytes > :wal_bytes_before FROM pg_stat_wal; t (1 row) +SELECT pg_stat_force_next_flush(); + pg_stat_force_next_flush +-------------------------- + +(1 row) + +SELECT wal_bytes > :backend_wal_bytes_before FROM pg_stat_get_backend_wal(pg_backend_pid()); + ?column? +---------- + t +(1 row) + -- Test pg_stat_get_backend_idset() and some allied functions. -- In particular, verify that their notion of backend ID matches -- our temp schema index. diff --git a/src/test/regress/sql/stats.sql b/src/test/regress/sql/stats.sql index 399c72bbcf..28fe0a1a7d 100644 --- a/src/test/regress/sql/stats.sql +++ b/src/test/regress/sql/stats.sql @@ -423,6 +423,9 @@ SELECT num_requested AS rqst_ckpts_before FROM pg_stat_checkpointer \gset -- Test pg_stat_wal (and make a temp table so our temp schema exists) SELECT wal_bytes AS wal_bytes_before FROM pg_stat_wal \gset +-- Test pg_stat_get_backend_wal (and make a temp table so our temp schema exists) +SELECT wal_bytes AS backend_wal_bytes_before from pg_stat_get_backend_wal(pg_backend_pid()) \gset + CREATE TEMP TABLE test_stats_temp AS SELECT 17; DROP TABLE test_stats_temp; @@ -435,6 +438,9 @@ CHECKPOINT; SELECT num_requested > :rqst_ckpts_before FROM pg_stat_checkpointer; SELECT wal_bytes > :wal_bytes_before FROM pg_stat_wal; +SELECT pg_stat_force_next_flush(); +SELECT wal_bytes > :backend_wal_bytes_before FROM pg_stat_get_backend_wal(pg_backend_pid()); + -- Test pg_stat_get_backend_idset() and some allied functions. -- In particular, verify that their notion of backend ID matches -- our temp schema index. diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index f15526236a..b593d8601e 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -2159,6 +2159,7 @@ PgStat_KindInfo PgStat_LocalState PgStat_PendingDroppedStatsItem PgStat_PendingIO +PgStat_PendingBackendWalStats PgStat_PendingWalStats PgStat_SLRUStats PgStat_ShmemControl @@ -2175,6 +2176,7 @@ PgStat_SubXactStatus PgStat_TableCounts PgStat_TableStatus PgStat_TableXactStatus +PgStat_WalCounters PgStat_WalStats PgXmlErrorContext PgXmlStrictness -- 2.34.1