Hi hackers, Now that commit 9aea73fc61 added backend-level statistics to pgstats (and per backend IO statistics), we can more easily add per backend statistics.
Please find attached a patch to implement $SUBJECT. It's using the same layer as pg_stat_wal, except that it is now possible to know how much WAL activity is happening in each backend rather than an overall aggregate of all the activity. A function called pg_stat_get_backend_wal() is added to access this data depending on the PID of a backend. === Outcome === With this in place, one could for example: 1. Get the WAL statistics for a backend pid: postgres=# select * from pg_stat_get_backend_wal(473278); -[ RECORD 1 ]----+--------- wal_records | 300008 wal_fpi | 7 wal_bytes | 17753097 wal_buffers_full | 933 wal_write | 937 wal_sync | 2 wal_write_time | 0 wal_sync_time | 0 stats_reset | 2. Get the wal_bytes generated by application: postgres=# SELECT application_name, wal_bytes, round(100 * wal_bytes/sum(wal_bytes) over(),2) AS "%" FROM (SELECT application_name, sum(wal_bytes) AS wal_bytes FROM pg_stat_activity, pg_stat_get_backend_wal(pid) WHERE wal_bytes != 0 GROUP BY application_name); application_name | wal_bytes | % ------------------+-----------+------- app1 | 17708761 | 39.95 app2 | 26614797 | 60.05 (2 rows) 3. Get the wal_bytes generated by database: postgres=# SELECT datname, wal_bytes, round(100 * wal_bytes/sum(wal_bytes) over(),2) AS "%" FROM (SELECT datname, sum(wal_bytes) AS wal_bytes FROM pg_stat_activity, pg_stat_get_backend_wal(pid) WHERE wal_bytes != 0 GROUP BY datname); datname | wal_bytes | % ---------+-----------+------- db1 | 35461858 | 80.01 db2 | 8861700 | 19.99 (2 rows) and much more... === Implementation === The same limitation as in 9aea73fc61 persists, meaning that Auxiliary processes are not included in this set of statistics. The patch is made of 3 sub-patches: 0001: to extract the logic filling pg_stat_get_wal()'s tuple into its own routine. It adds pg_stat_wal_build_tuple(), a helper routine for pg_stat_get_wal(), that fills its tuple based on the contents of PgStat_WalStats. Same idea as ff7c40d7fd. 0002: PGSTAT_KIND_BACKEND code refactoring. It refactors some come related to per backend statistics. It makes the code more generic or more IO statistics focused as it will be used in 0003 that will introduce per backend WAL statistics. It does not add any new feature, that's 100% code refactoring to ease 0003 review. 0003: it adds the per backend WAL statistics and the new pg_stat_get_backend_wal() function, documentation and related test. === Remarks === R1: 0003 does not rely on pgstat_prep_backend_pending() for its pending statistics but on a new PendingBackendWalStats variable. The reason is that the pending wal statistics are incremented in a critical section (see XLogWrite(), and so a call to pgstat_prep_pending_entry() could trigger a failed assertion: MemoryContextAllocZero()->"CritSectionCount == 0 || (context)->allowInCritSection" R2: Instead of relying on a new PendingBackendWalStats, we could rely on the existing PendingWalStats variable. But that would complicate the flush of per backend and existing wal stats as that would need some coordination. I think that it's better that each kind has its own pending variable. R3: Instead of incrementing the PendingBackendWalStats members individually we could also "just" assign the PendingWalStats ones once incremented. I thought it's better to make them "fully independent" though. R4: 0002 introduces a new PgStat_BackendPending struct. Due to R1, that's not needed per say but could have been if pgstat_prep_backend_pending() would have been used. I keep this change as we may want to add more per backend stats in the future. Looking forward to your feedback, Regards, -- Bertrand Drouvot PostgreSQL Contributors Team RDS Open Source Databases Amazon Web Services: https://aws.amazon.com
>From 5c2241fcee52f7fb3cfea9890d495f51b981a9c0 Mon Sep 17 00:00:00 2001 From: Bertrand Drouvot <bertranddrouvot...@gmail.com> Date: Mon, 6 Jan 2025 07:51:27 +0000 Subject: [PATCH v1 1/3] Extract logic filling pg_stat_get_wal()'s tuple into its own routine This commit adds pg_stat_wal_build_tuple(), a helper routine for pg_stat_get_wal(), that fills its tuple based on the contents of PgStat_WalStats. This will be used in a follow-up commit that uses the same structures as pg_stat_wal for reporting, but for the PGSTAT_KIND_BACKEND statistics kind. --- src/backend/utils/adt/pgstatfuncs.c | 48 +++++++++++++++++++---------- 1 file changed, 32 insertions(+), 16 deletions(-) 100.0% src/backend/utils/adt/ diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c index 3245f3a8d8..cccfbf0706 100644 --- a/src/backend/utils/adt/pgstatfuncs.c +++ b/src/backend/utils/adt/pgstatfuncs.c @@ -1560,17 +1560,19 @@ pg_stat_get_backend_io(PG_FUNCTION_ARGS) } /* - * Returns statistics of WAL activity + * pg_stat_wal_build_tuple + * + * Helper routine for pg_stat_get_wal() returning one tuple based on the contents + * of wal_stats. */ -Datum -pg_stat_get_wal(PG_FUNCTION_ARGS) +static Datum +pg_stat_wal_build_tuple(PgStat_WalStats wal_stats) { #define PG_STAT_GET_WAL_COLS 9 TupleDesc tupdesc; Datum values[PG_STAT_GET_WAL_COLS] = {0}; bool nulls[PG_STAT_GET_WAL_COLS] = {0}; char buf[256]; - PgStat_WalStats *wal_stats; /* Initialise attributes information in the tuple descriptor */ tupdesc = CreateTemplateTupleDesc(PG_STAT_GET_WAL_COLS); @@ -1595,34 +1597,48 @@ pg_stat_get_wal(PG_FUNCTION_ARGS) BlessTupleDesc(tupdesc); - /* Get statistics about WAL activity */ - wal_stats = pgstat_fetch_stat_wal(); - /* Fill values and NULLs */ - values[0] = Int64GetDatum(wal_stats->wal_records); - values[1] = Int64GetDatum(wal_stats->wal_fpi); + values[0] = Int64GetDatum(wal_stats.wal_records); + values[1] = Int64GetDatum(wal_stats.wal_fpi); /* Convert to numeric. */ - snprintf(buf, sizeof buf, UINT64_FORMAT, wal_stats->wal_bytes); + snprintf(buf, sizeof buf, UINT64_FORMAT, wal_stats.wal_bytes); values[2] = DirectFunctionCall3(numeric_in, CStringGetDatum(buf), ObjectIdGetDatum(0), Int32GetDatum(-1)); - values[3] = Int64GetDatum(wal_stats->wal_buffers_full); - values[4] = Int64GetDatum(wal_stats->wal_write); - values[5] = Int64GetDatum(wal_stats->wal_sync); + values[3] = Int64GetDatum(wal_stats.wal_buffers_full); + values[4] = Int64GetDatum(wal_stats.wal_write); + values[5] = Int64GetDatum(wal_stats.wal_sync); /* Convert counters from microsec to millisec for display */ - values[6] = Float8GetDatum(((double) wal_stats->wal_write_time) / 1000.0); - values[7] = Float8GetDatum(((double) wal_stats->wal_sync_time) / 1000.0); + values[6] = Float8GetDatum(((double) wal_stats.wal_write_time) / 1000.0); + values[7] = Float8GetDatum(((double) wal_stats.wal_sync_time) / 1000.0); - values[8] = TimestampTzGetDatum(wal_stats->stat_reset_timestamp); + if (wal_stats.stat_reset_timestamp != 0) + values[8] = TimestampTzGetDatum(wal_stats.stat_reset_timestamp); + else + nulls[8] = true; /* Returns the record as Datum */ PG_RETURN_DATUM(HeapTupleGetDatum(heap_form_tuple(tupdesc, values, nulls))); } +/* + * Returns statistics of WAL activity + */ +Datum +pg_stat_get_wal(PG_FUNCTION_ARGS) +{ + PgStat_WalStats *wal_stats; + + /* Get statistics about WAL activity */ + wal_stats = pgstat_fetch_stat_wal(); + + return (pg_stat_wal_build_tuple(*wal_stats)); +} + /* * Returns statistics of SLRU caches. */ -- 2.34.1
>From 29d2d565ba106028907524134b267998d36e9762 Mon Sep 17 00:00:00 2001 From: Bertrand Drouvot <bertranddrouvot...@gmail.com> Date: Mon, 6 Jan 2025 08:44:29 +0000 Subject: [PATCH v1 2/3] PGSTAT_KIND_BACKEND code refactoring This commit refactors some come related to per backend statistics. It makes the code more generic or more IO statistics focused as it will be used in a follow-up commit that will introduce per backend WAL statistics. --- src/backend/utils/activity/pgstat.c | 2 +- src/backend/utils/activity/pgstat_backend.c | 60 +++++++++++++------- src/backend/utils/activity/pgstat_io.c | 8 +-- src/backend/utils/activity/pgstat_relation.c | 4 +- src/backend/utils/adt/pgstatfuncs.c | 2 +- src/include/pgstat.h | 6 +- src/include/utils/pgstat_internal.h | 5 +- src/tools/pgindent/typedefs.list | 1 + 8 files changed, 57 insertions(+), 31 deletions(-) 81.9% src/backend/utils/activity/ 9.0% src/include/utils/ 5.3% src/include/ 3.6% src/ diff --git a/src/backend/utils/activity/pgstat.c b/src/backend/utils/activity/pgstat.c index 16a03b8ce1..34520535d5 100644 --- a/src/backend/utils/activity/pgstat.c +++ b/src/backend/utils/activity/pgstat.c @@ -370,7 +370,7 @@ static const PgStat_KindInfo pgstat_kind_builtin_infos[PGSTAT_KIND_BUILTIN_SIZE] .shared_size = sizeof(PgStatShared_Backend), .shared_data_off = offsetof(PgStatShared_Backend, stats), .shared_data_len = sizeof(((PgStatShared_Backend *) 0)->stats), - .pending_size = sizeof(PgStat_BackendPendingIO), + .pending_size = sizeof(PgStat_BackendPending), .flush_pending_cb = pgstat_backend_flush_cb, .reset_timestamp_cb = pgstat_backend_reset_timestamp_cb, diff --git a/src/backend/utils/activity/pgstat_backend.c b/src/backend/utils/activity/pgstat_backend.c index 1f91bfef0a..2bdaa07828 100644 --- a/src/backend/utils/activity/pgstat_backend.c +++ b/src/backend/utils/activity/pgstat_backend.c @@ -39,23 +39,23 @@ pgstat_fetch_stat_backend(ProcNumber procNumber) } /* - * Flush out locally pending backend statistics - * - * If no stats have been recorded, this function returns false. + * Flush out locally pending backend IO statistics. */ -bool -pgstat_backend_flush_cb(PgStat_EntryRef *entry_ref, bool nowait) +static void +pgstat_flush_io_entry(PgStat_EntryRef *entry_ref, bool nowait, bool need_lock) { - PgStatShared_Backend *shbackendioent; - PgStat_BackendPendingIO *pendingent; + PgStatShared_Backend *shbackendent; + PgStat_BackendPending *pendingent; PgStat_BktypeIO *bktype_shstats; + PgStat_BackendPendingIO *pending_io; - if (!pgstat_lock_entry(entry_ref, nowait)) - return false; + if (need_lock && !pgstat_lock_entry(entry_ref, nowait)) + return; - shbackendioent = (PgStatShared_Backend *) entry_ref->shared_stats; - bktype_shstats = &shbackendioent->stats.stats; - pendingent = (PgStat_BackendPendingIO *) entry_ref->pending; + shbackendent = (PgStatShared_Backend *) entry_ref->shared_stats; + pendingent = (PgStat_BackendPending *) entry_ref->pending; + bktype_shstats = &shbackendent->stats.io_stats; + pending_io = &pendingent->pending_io; for (int io_object = 0; io_object < IOOBJECT_NUM_TYPES; io_object++) { @@ -66,9 +66,9 @@ pgstat_backend_flush_cb(PgStat_EntryRef *entry_ref, bool nowait) instr_time time; bktype_shstats->counts[io_object][io_context][io_op] += - pendingent->counts[io_object][io_context][io_op]; + pending_io->counts[io_object][io_context][io_op]; - time = pendingent->pending_times[io_object][io_context][io_op]; + time = pending_io->pending_times[io_object][io_context][io_op]; bktype_shstats->times[io_object][io_context][io_op] += INSTR_TIME_GET_MICROSEC(time); @@ -76,16 +76,37 @@ pgstat_backend_flush_cb(PgStat_EntryRef *entry_ref, bool nowait) } } + if (need_lock) + pgstat_unlock_entry(entry_ref); +} + +/* + * Flush out locally pending backend statistics + * + * If no stats have been recorded, this function returns false. + */ +bool +pgstat_backend_flush_cb(PgStat_EntryRef *entry_ref, bool nowait) +{ + if (!pgstat_tracks_backend_bktype(MyBackendType)) + return false; + + if (!pgstat_lock_entry(entry_ref, nowait)) + return false; + + /* IO stats */ + pgstat_flush_io_entry(entry_ref, nowait, false); + pgstat_unlock_entry(entry_ref); return true; } /* - * Simpler wrapper of pgstat_backend_flush_cb() + * Simpler wrapper of pgstat_flush_io_entry() */ void -pgstat_flush_backend(bool nowait) +pgstat_backend_flush_io(bool nowait) { PgStat_EntryRef *entry_ref; @@ -94,7 +115,8 @@ pgstat_flush_backend(bool nowait) entry_ref = pgstat_get_entry_ref(PGSTAT_KIND_BACKEND, InvalidOid, MyProcNumber, false, NULL); - (void) pgstat_backend_flush_cb(entry_ref, nowait); + + pgstat_flush_io_entry(entry_ref, nowait, true); } /* @@ -119,9 +141,9 @@ pgstat_create_backend(ProcNumber procnum) } /* - * Find or create a local PgStat_BackendPendingIO entry for proc number. + * Find or create a local PgStat_BackendPending entry for proc number. */ -PgStat_BackendPendingIO * +PgStat_BackendPending * pgstat_prep_backend_pending(ProcNumber procnum) { PgStat_EntryRef *entry_ref; diff --git a/src/backend/utils/activity/pgstat_io.c b/src/backend/utils/activity/pgstat_io.c index f9a1f91dba..a7445995d3 100644 --- a/src/backend/utils/activity/pgstat_io.c +++ b/src/backend/utils/activity/pgstat_io.c @@ -81,10 +81,10 @@ pgstat_count_io_op_n(IOObject io_object, IOContext io_context, IOOp io_op, uint3 if (pgstat_tracks_backend_bktype(MyBackendType)) { - PgStat_PendingIO *entry_ref; + PgStat_BackendPending *entry_ref; entry_ref = pgstat_prep_backend_pending(MyProcNumber); - entry_ref->counts[io_object][io_context][io_op] += cnt; + entry_ref->pending_io.counts[io_object][io_context][io_op] += cnt; } PendingIOStats.counts[io_object][io_context][io_op] += cnt; @@ -151,10 +151,10 @@ pgstat_count_io_op_time(IOObject io_object, IOContext io_context, IOOp io_op, if (pgstat_tracks_backend_bktype(MyBackendType)) { - PgStat_PendingIO *entry_ref; + PgStat_BackendPending *entry_ref; entry_ref = pgstat_prep_backend_pending(MyProcNumber); - INSTR_TIME_ADD(entry_ref->pending_times[io_object][io_context][io_op], + INSTR_TIME_ADD(entry_ref->pending_io.pending_times[io_object][io_context][io_op], io_time); } } diff --git a/src/backend/utils/activity/pgstat_relation.c b/src/backend/utils/activity/pgstat_relation.c index 2cc304f881..6092826479 100644 --- a/src/backend/utils/activity/pgstat_relation.c +++ b/src/backend/utils/activity/pgstat_relation.c @@ -264,7 +264,7 @@ pgstat_report_vacuum(Oid tableoid, bool shared, * VACUUM command has processed all tables and committed. */ pgstat_flush_io(false); - pgstat_flush_backend(false); + pgstat_backend_flush_io(false); } /* @@ -351,7 +351,7 @@ pgstat_report_analyze(Relation rel, /* see pgstat_report_vacuum() */ pgstat_flush_io(false); - pgstat_flush_backend(false); + pgstat_backend_flush_io(false); } /* diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c index cccfbf0706..cd3baed472 100644 --- a/src/backend/utils/adt/pgstatfuncs.c +++ b/src/backend/utils/adt/pgstatfuncs.c @@ -1544,7 +1544,7 @@ pg_stat_get_backend_io(PG_FUNCTION_ARGS) if (bktype == B_INVALID) return (Datum) 0; - bktype_stats = &backend_stats->stats; + bktype_stats = &backend_stats->io_stats; /* * In Assert builds, we can afford an extra loop through all of the diff --git a/src/include/pgstat.h b/src/include/pgstat.h index 0d8427f27d..6631bd2d73 100644 --- a/src/include/pgstat.h +++ b/src/include/pgstat.h @@ -381,7 +381,7 @@ typedef PgStat_PendingIO PgStat_BackendPendingIO; typedef struct PgStat_Backend { TimestampTz stat_reset_timestamp; - PgStat_BktypeIO stats; + PgStat_BktypeIO io_stats; } PgStat_Backend; typedef struct PgStat_StatDBEntry @@ -523,6 +523,10 @@ typedef struct PgStat_PendingWalStats instr_time wal_sync_time; } PgStat_PendingWalStats; +typedef struct PgStat_BackendPending +{ + PgStat_BackendPendingIO pending_io; +} PgStat_BackendPending; /* * Functions in pgstat.c diff --git a/src/include/utils/pgstat_internal.h b/src/include/utils/pgstat_internal.h index 52eb008710..320cd5a842 100644 --- a/src/include/utils/pgstat_internal.h +++ b/src/include/utils/pgstat_internal.h @@ -613,9 +613,8 @@ extern void pgstat_archiver_snapshot_cb(void); * Functions in pgstat_backend.c */ -extern void pgstat_flush_backend(bool nowait); - -extern PgStat_BackendPendingIO *pgstat_prep_backend_pending(ProcNumber procnum); +extern void pgstat_backend_flush_io(bool nowait); +extern PgStat_BackendPending *pgstat_prep_backend_pending(ProcNumber procnum); extern bool pgstat_backend_flush_cb(PgStat_EntryRef *entry_ref, bool nowait); extern void pgstat_backend_reset_timestamp_cb(PgStatShared_Common *header, TimestampTz ts); diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index e1c4f913f8..1c8516fd63 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -2139,6 +2139,7 @@ PgStatShared_Subscription PgStatShared_Wal PgStat_ArchiverStats PgStat_Backend +PgStat_BackendPending PgStat_BackendPendingIO PgStat_BackendSubEntry PgStat_BgWriterStats -- 2.34.1
>From f42d48b841507d5510e0964fb12690d638fa62f4 Mon Sep 17 00:00:00 2001 From: Bertrand Drouvot <bertranddrouvot...@gmail.com> Date: Mon, 6 Jan 2025 10:00:00 +0000 Subject: [PATCH v1 3/3] per backend WAL statistics Now that commit 9aea73fc61 added backend-level statistics to pgstats (and per backend IO statistics) we can more easily add per backend statistics. This commit adds per backend WAL statistics using the same layer as pg_stat_wal, except that it is now possible to know how much WAL activity is happening in each backend rather than an overall aggregate of all the activity. A function called pg_stat_get_backend_wal() is added to access this data depending on the PID of a backend. The same limitation as in 9aea73fc61 persists, meaning that Auxiliary processes are not included in this set of statistics. XXX: bump catalog version --- doc/src/sgml/config.sgml | 4 +- doc/src/sgml/monitoring.sgml | 19 ++++ src/backend/access/transam/xlog.c | 36 ++++++- src/backend/utils/activity/pgstat_backend.c | 108 +++++++++++++++++++- src/backend/utils/activity/pgstat_wal.c | 6 +- src/backend/utils/adt/pgstatfuncs.c | 78 +++++++++++--- src/include/catalog/pg_proc.dat | 7 ++ src/include/pgstat.h | 37 +++++-- src/include/utils/pgstat_internal.h | 1 + src/test/regress/expected/stats.out | 14 +++ src/test/regress/sql/stats.sql | 6 ++ src/tools/pgindent/typedefs.list | 2 + 12 files changed, 289 insertions(+), 29 deletions(-) 9.9% doc/src/sgml/ 8.6% src/backend/access/transam/ 34.7% src/backend/utils/activity/ 27.6% src/backend/utils/adt/ 4.9% src/include/catalog/ 6.6% src/include/ 3.9% src/test/regress/expected/ 3.0% src/test/regress/sql/ diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index fbdd6ce574..ec253414a7 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -8433,7 +8433,9 @@ COPY postgres_log FROM '/full/path/to/logfile.csv' WITH csv; measure the overhead of timing on your system. I/O timing information is displayed in <link linkend="monitoring-pg-stat-wal-view"> - <structname>pg_stat_wal</structname></link>. + <structname>pg_stat_wal</structname></link> and in the output of the + <link linkend="pg-stat-get-backend-wal"> + <function>pg_stat_get_backend_wal()</function></link> function. Only superusers and users with the appropriate <literal>SET</literal> privilege can change this setting. </para> diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml index d0d176cc54..84a2d09b76 100644 --- a/doc/src/sgml/monitoring.sgml +++ b/doc/src/sgml/monitoring.sgml @@ -4811,6 +4811,25 @@ description | Waiting for a newly initialized WAL file to reach durable storage </para></entry> </row> + <row> + <entry id="pg-stat-get-backend-wal" role="func_table_entry"><para role="func_signature"> + <indexterm> + <primary>pg_stat_get_backend_wal</primary> + </indexterm> + <function>pg_stat_get_backend_wal</function> ( <type>integer</type> ) + <returnvalue>record</returnvalue> + </para> + <para> + Returns WAL statistics about the backend with the specified + process ID. The output fields are exactly the same as the ones in the + <structname>pg_stat_wal</structname> view. + </para> + <para> + The function does not return WAL statistics for the checkpointer, + the background writer, the startup process and the autovacuum launcher. + </para></entry> + </row> + <row> <entry role="func_table_entry"><para role="func_signature"> <indexterm> diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index b9ea92a542..513e26aa4b 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -2058,6 +2058,10 @@ AdvanceXLInsertBuffer(XLogRecPtr upto, TimeLineID tli, bool opportunistic) XLogWrite(WriteRqst, tli, false); LWLockRelease(WALWriteLock); PendingWalStats.wal_buffers_full++; + + if (pgstat_tracks_backend_bktype(MyBackendType)) + PendingBackendWalStats.wal_buffers_full++; + TRACE_POSTGRESQL_WAL_BUFFER_WRITE_DIRTY_DONE(); } /* Re-acquire WALBufMappingLock and retry */ @@ -2426,11 +2430,14 @@ XLogWrite(XLogwrtRqst WriteRqst, TimeLineID tli, bool flexible) Size nleft; ssize_t written; instr_time start; + instr_time end; /* OK to write the page(s) */ from = XLogCtl->pages + startidx * (Size) XLOG_BLCKSZ; nbytes = npages * (Size) XLOG_BLCKSZ; nleft = nbytes; + /* keep compiler quiet */ + INSTR_TIME_SET_ZERO(end); do { errno = 0; @@ -2451,14 +2458,26 @@ XLogWrite(XLogwrtRqst WriteRqst, TimeLineID tli, bool flexible) */ if (track_wal_io_timing) { - instr_time end; - INSTR_TIME_SET_CURRENT(end); INSTR_TIME_ACCUM_DIFF(PendingWalStats.wal_write_time, end, start); } PendingWalStats.wal_write++; + if (pgstat_tracks_backend_bktype(MyBackendType)) + { + /* + * We are inside a critical section, so we can't use + * pgstat_prep_pending_entry() and we rely on + * PendingBackendWalStats instead. + */ + PendingBackendWalStats.wal_write++; + + if (track_wal_io_timing) + INSTR_TIME_ACCUM_DIFF(PendingBackendWalStats.wal_write_time, + end, start); + } + if (written <= 0) { char xlogfname[MAXFNAMELEN]; @@ -8684,8 +8703,11 @@ issue_xlog_fsync(int fd, XLogSegNo segno, TimeLineID tli) { char *msg = NULL; instr_time start; + instr_time end; Assert(tli != 0); + /* keep compiler quiet */ + INSTR_TIME_SET_ZERO(end); /* * Quick exit if fsync is disabled or write() has already synced the WAL @@ -8751,13 +8773,19 @@ issue_xlog_fsync(int fd, XLogSegNo segno, TimeLineID tli) */ if (track_wal_io_timing) { - instr_time end; - INSTR_TIME_SET_CURRENT(end); INSTR_TIME_ACCUM_DIFF(PendingWalStats.wal_sync_time, end, start); } PendingWalStats.wal_sync++; + + if (pgstat_tracks_backend_bktype(MyBackendType)) + { + PendingBackendWalStats.wal_sync++; + + if (track_wal_io_timing) + INSTR_TIME_ACCUM_DIFF(PendingBackendWalStats.wal_sync_time, end, start); + } } /* diff --git a/src/backend/utils/activity/pgstat_backend.c b/src/backend/utils/activity/pgstat_backend.c index 2bdaa07828..28fb7a38d7 100644 --- a/src/backend/utils/activity/pgstat_backend.c +++ b/src/backend/utils/activity/pgstat_backend.c @@ -24,6 +24,16 @@ #include "utils/pgstat_internal.h" +PgStat_PendingWalStats PendingBackendWalStats = {0}; + +/* + * WAL usage counters saved from pgWalUsage at the previous call to + * pgstat_report_wal(). This is used to calculate how much WAL usage + * happens between pgstat_report_wal() calls, by subtracting + * the previous counters from the current ones. + */ +static WalUsage prevBackendWalUsage; + /* * Returns statistics of a backend by proc number. */ @@ -80,6 +90,80 @@ pgstat_flush_io_entry(PgStat_EntryRef *entry_ref, bool nowait, bool need_lock) pgstat_unlock_entry(entry_ref); } +/* + * To determine whether any WAL activity has occurred since last time, not + * only the number of generated WAL records but also the numbers of WAL + * writes and syncs need to be checked. Because even transaction that + * generates no WAL records can write or sync WAL data when flushing the + * data pages. + */ +static bool +pgstat_backend_wal_have_pending(void) +{ + return pgWalUsage.wal_records != prevBackendWalUsage.wal_records || + PendingBackendWalStats.wal_write != 0 || + PendingBackendWalStats.wal_sync != 0; +} + +/* + * Flush out locally pending backend WAL statistics. + */ +static void +pgstat_flush_wal_entry(PgStat_EntryRef *entry_ref, bool nowait, bool need_lock) +{ + PgStatShared_Backend *shbackendent; + PgStat_WalCounters *bktype_shstats; + WalUsage wal_usage_diff = {0}; + + /* + * This function can be called even if nothing at all has happened. Avoid + * taking lock for nothing in that case. + */ + if (!pgstat_backend_wal_have_pending()) + return; + + if (need_lock && !pgstat_lock_entry(entry_ref, nowait)) + return; + + shbackendent = (PgStatShared_Backend *) entry_ref->shared_stats; + bktype_shstats = &shbackendent->stats.wal_stats; + + /* + * We don't update the WAL usage portion of the local WalStats elsewhere. + * Calculate how much WAL usage counters were increased by subtracting the + * previous counters from the current ones. + */ + WalUsageAccumDiff(&wal_usage_diff, &pgWalUsage, &prevBackendWalUsage); + +#define WALSTAT_ACC(fld, var_to_add) \ + (bktype_shstats->fld += var_to_add.fld) +#define WALSTAT_ACC_INSTR_TIME(fld) \ + (bktype_shstats->fld += INSTR_TIME_GET_MICROSEC(PendingBackendWalStats.fld)) + WALSTAT_ACC(wal_buffers_full, PendingBackendWalStats); + WALSTAT_ACC(wal_write, PendingBackendWalStats); + WALSTAT_ACC(wal_sync, PendingBackendWalStats); + WALSTAT_ACC(wal_records, wal_usage_diff); + WALSTAT_ACC(wal_fpi, wal_usage_diff); + WALSTAT_ACC(wal_bytes, wal_usage_diff); + WALSTAT_ACC_INSTR_TIME(wal_write_time); + WALSTAT_ACC_INSTR_TIME(wal_sync_time); +#undef WALSTAT_ACC_INSTR_TIME +#undef WALSTAT_ACC + + /* + * Save the current counters for the subsequent calculation of WAL usage. + */ + prevBackendWalUsage = pgWalUsage; + + /* + * Clear out the statistics buffer, so it can be re-used. + */ + MemSet(&PendingBackendWalStats, 0, sizeof(PendingWalStats)); + + if (need_lock) + pgstat_unlock_entry(entry_ref); +} + /* * Flush out locally pending backend statistics * @@ -97,6 +181,9 @@ pgstat_backend_flush_cb(PgStat_EntryRef *entry_ref, bool nowait) /* IO stats */ pgstat_flush_io_entry(entry_ref, nowait, false); + /* WAL stats */ + pgstat_flush_wal_entry(entry_ref, nowait, false); + pgstat_unlock_entry(entry_ref); return true; @@ -119,6 +206,23 @@ pgstat_backend_flush_io(bool nowait) pgstat_flush_io_entry(entry_ref, nowait, true); } +/* + * Simpler wrapper of pgstat_flush_wal_entry() + */ +void +pgstat_backend_flush_wal(bool nowait) +{ + PgStat_EntryRef *entry_ref; + + if (!pgstat_tracks_backend_bktype(MyBackendType)) + return; + + entry_ref = pgstat_get_entry_ref(PGSTAT_KIND_BACKEND, InvalidOid, + MyProcNumber, false, NULL); + + pgstat_flush_wal_entry(entry_ref, nowait, true); +} + /* * Create backend statistics entry for proc number. */ @@ -138,10 +242,12 @@ pgstat_create_backend(ProcNumber procnum) * e.g. if we previously used this proc number. */ memset(&shstatent->stats, 0, sizeof(shstatent->stats)); + + prevBackendWalUsage = pgWalUsage; } /* - * Find or create a local PgStat_BackendPending entry for proc number. + * Find or create a local PgStat_BackendPendingIO entry for proc number. */ PgStat_BackendPending * pgstat_prep_backend_pending(ProcNumber procnum) diff --git a/src/backend/utils/activity/pgstat_wal.c b/src/backend/utils/activity/pgstat_wal.c index 18fa6b2936..160ce99a4c 100644 --- a/src/backend/utils/activity/pgstat_wal.c +++ b/src/backend/utils/activity/pgstat_wal.c @@ -55,6 +55,8 @@ pgstat_report_wal(bool force) /* flush wal stats */ pgstat_flush_wal(nowait); + pgstat_backend_flush_wal(nowait); + /* flush IO stats */ pgstat_flush_io(nowait); } @@ -117,9 +119,9 @@ pgstat_wal_flush_cb(bool nowait) return true; #define WALSTAT_ACC(fld, var_to_add) \ - (stats_shmem->stats.fld += var_to_add.fld) + (stats_shmem->stats.wal_counters.fld += var_to_add.fld) #define WALSTAT_ACC_INSTR_TIME(fld) \ - (stats_shmem->stats.fld += INSTR_TIME_GET_MICROSEC(PendingWalStats.fld)) + (stats_shmem->stats.wal_counters.fld += INSTR_TIME_GET_MICROSEC(PendingWalStats.fld)) WALSTAT_ACC(wal_records, wal_usage_diff); WALSTAT_ACC(wal_fpi, wal_usage_diff); WALSTAT_ACC(wal_bytes, wal_usage_diff); diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c index cd3baed472..593f04dc2a 100644 --- a/src/backend/utils/adt/pgstatfuncs.c +++ b/src/backend/utils/adt/pgstatfuncs.c @@ -1562,11 +1562,12 @@ pg_stat_get_backend_io(PG_FUNCTION_ARGS) /* * pg_stat_wal_build_tuple * - * Helper routine for pg_stat_get_wal() returning one tuple based on the contents - * of wal_stats. + * Helper routine for pg_stat_get_wal() and pg_stat_get_backend_wal() returning + * one tuple based on the contents of wal_counters. */ static Datum -pg_stat_wal_build_tuple(PgStat_WalStats wal_stats) +pg_stat_wal_build_tuple(PgStat_WalCounters wal_counters, + TimestampTz stat_reset_timestamp) { #define PG_STAT_GET_WAL_COLS 9 TupleDesc tupdesc; @@ -1598,26 +1599,26 @@ pg_stat_wal_build_tuple(PgStat_WalStats wal_stats) BlessTupleDesc(tupdesc); /* Fill values and NULLs */ - values[0] = Int64GetDatum(wal_stats.wal_records); - values[1] = Int64GetDatum(wal_stats.wal_fpi); + values[0] = Int64GetDatum(wal_counters.wal_records); + values[1] = Int64GetDatum(wal_counters.wal_fpi); /* Convert to numeric. */ - snprintf(buf, sizeof buf, UINT64_FORMAT, wal_stats.wal_bytes); + snprintf(buf, sizeof buf, UINT64_FORMAT, wal_counters.wal_bytes); values[2] = DirectFunctionCall3(numeric_in, CStringGetDatum(buf), ObjectIdGetDatum(0), Int32GetDatum(-1)); - values[3] = Int64GetDatum(wal_stats.wal_buffers_full); - values[4] = Int64GetDatum(wal_stats.wal_write); - values[5] = Int64GetDatum(wal_stats.wal_sync); + values[3] = Int64GetDatum(wal_counters.wal_buffers_full); + values[4] = Int64GetDatum(wal_counters.wal_write); + values[5] = Int64GetDatum(wal_counters.wal_sync); /* Convert counters from microsec to millisec for display */ - values[6] = Float8GetDatum(((double) wal_stats.wal_write_time) / 1000.0); - values[7] = Float8GetDatum(((double) wal_stats.wal_sync_time) / 1000.0); + values[6] = Float8GetDatum(((double) wal_counters.wal_write_time) / 1000.0); + values[7] = Float8GetDatum(((double) wal_counters.wal_sync_time) / 1000.0); - if (wal_stats.stat_reset_timestamp != 0) - values[8] = TimestampTzGetDatum(wal_stats.stat_reset_timestamp); + if (stat_reset_timestamp != 0) + values[8] = TimestampTzGetDatum(stat_reset_timestamp); else nulls[8] = true; @@ -1625,6 +1626,55 @@ pg_stat_wal_build_tuple(PgStat_WalStats wal_stats) PG_RETURN_DATUM(HeapTupleGetDatum(heap_form_tuple(tupdesc, values, nulls))); } +/* + * Returns WAL statistics for a backend with given PID. + */ +Datum +pg_stat_get_backend_wal(PG_FUNCTION_ARGS) +{ + int pid; + PGPROC *proc; + ProcNumber procNumber; + PgStat_Backend *backend_stats; + PgStat_WalCounters bktype_stats; + PgBackendStatus *beentry; + + pid = PG_GETARG_INT32(0); + proc = BackendPidGetProc(pid); + + /* + * This could be an auxiliary process but these do not report backend + * statistics due to pgstat_tracks_backend_bktype(), so there is no need + * for an extra call to AuxiliaryPidGetProc(). + */ + if (!proc) + PG_RETURN_NULL(); + + procNumber = GetNumberFromPGProc(proc); + + beentry = pgstat_get_beentry_by_proc_number(procNumber); + if (!beentry) + PG_RETURN_NULL(); + + backend_stats = pgstat_fetch_stat_backend(procNumber); + if (!backend_stats) + PG_RETURN_NULL(); + + /* if PID does not match, leave */ + if (beentry->st_procpid != pid) + PG_RETURN_NULL(); + + /* backend may be gone, so recheck in case */ + if (beentry->st_backendType == B_INVALID) + PG_RETURN_NULL(); + + bktype_stats = backend_stats->wal_stats; + + /* save tuples with data from this PgStat_BktypeIO */ + return (pg_stat_wal_build_tuple(bktype_stats, backend_stats->stat_reset_timestamp)); +} + + /* * Returns statistics of WAL activity */ @@ -1636,7 +1686,7 @@ pg_stat_get_wal(PG_FUNCTION_ARGS) /* Get statistics about WAL activity */ wal_stats = pgstat_fetch_stat_wal(); - return (pg_stat_wal_build_tuple(*wal_stats)); + return (pg_stat_wal_build_tuple(wal_stats->wal_counters, wal_stats->stat_reset_timestamp)); } /* diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index b37e8a6f88..72a5dae4b1 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -5929,6 +5929,13 @@ proargmodes => '{o,o,o,o,o,o,o,o,o}', proargnames => '{wal_records,wal_fpi,wal_bytes,wal_buffers_full,wal_write,wal_sync,wal_write_time,wal_sync_time,stats_reset}', prosrc => 'pg_stat_get_wal' }, +{ oid => '8037', descr => 'statistics: backend WAL activity', + proname => 'pg_stat_get_backend_wal', provolatile => 'v', + proparallel => 'r', prorettype => 'record', proargtypes => 'int4', + proallargtypes => '{int4,int8,int8,numeric,int8,int8,int8,float8,float8,timestamptz}', + proargmodes => '{i,o,o,o,o,o,o,o,o,o}', + proargnames => '{backend_pid,wal_records,wal_fpi,wal_bytes,wal_buffers_full,wal_write,wal_sync,wal_write_time,wal_sync_time,stats_reset}', + prosrc => 'pg_stat_get_backend_wal' }, { oid => '6248', descr => 'statistics: information about WAL prefetching', proname => 'pg_stat_get_recovery_prefetch', prorows => '1', proretset => 't', provolatile => 'v', prorettype => 'record', proargtypes => '', diff --git a/src/include/pgstat.h b/src/include/pgstat.h index 6631bd2d73..045877c5a8 100644 --- a/src/include/pgstat.h +++ b/src/include/pgstat.h @@ -378,12 +378,6 @@ typedef struct PgStat_IO /* Backend statistics store the same amount of IO data as PGSTAT_KIND_IO */ typedef PgStat_PendingIO PgStat_BackendPendingIO; -typedef struct PgStat_Backend -{ - TimestampTz stat_reset_timestamp; - PgStat_BktypeIO io_stats; -} PgStat_Backend; - typedef struct PgStat_StatDBEntry { PgStat_Counter xact_commit; @@ -495,7 +489,7 @@ typedef struct PgStat_StatTabEntry PgStat_Counter autoanalyze_count; } PgStat_StatTabEntry; -typedef struct PgStat_WalStats +typedef struct PgStat_WalCounters { PgStat_Counter wal_records; PgStat_Counter wal_fpi; @@ -505,6 +499,11 @@ typedef struct PgStat_WalStats PgStat_Counter wal_sync; PgStat_Counter wal_write_time; PgStat_Counter wal_sync_time; +} PgStat_WalCounters; + +typedef struct PgStat_WalStats +{ + PgStat_WalCounters wal_counters; TimestampTz stat_reset_timestamp; } PgStat_WalStats; @@ -523,9 +522,21 @@ typedef struct PgStat_PendingWalStats instr_time wal_sync_time; } PgStat_PendingWalStats; +typedef struct PgStat_Backend +{ + TimestampTz stat_reset_timestamp; + PgStat_BktypeIO io_stats; + PgStat_WalCounters wal_stats; +} PgStat_Backend; + typedef struct PgStat_BackendPending { PgStat_BackendPendingIO pending_io; + + /* + * We are not creating one member for PgStat_PendingWalStats. See the + * comment above the PendingBackendWalStats definition as to why. + */ } PgStat_BackendPending; /* @@ -857,5 +868,17 @@ extern PGDLLIMPORT SessionEndType pgStatSessionEndCause; /* updated directly by backends and background processes */ extern PGDLLIMPORT PgStat_PendingWalStats PendingWalStats; +/* + * Variables in pgstat_backend.c + */ + +/* updated directly by backends and background processes */ + +/* + * WAL pending statistics are incremented inside a critical section + * (see XLogWrite()), so we can't use pgstat_prep_pending_entry() and we rely on + * PendingBackendWalStats instead. + */ +extern PGDLLIMPORT PgStat_PendingWalStats PendingBackendWalStats; #endif /* PGSTAT_H */ diff --git a/src/include/utils/pgstat_internal.h b/src/include/utils/pgstat_internal.h index 320cd5a842..06681b9102 100644 --- a/src/include/utils/pgstat_internal.h +++ b/src/include/utils/pgstat_internal.h @@ -614,6 +614,7 @@ extern void pgstat_archiver_snapshot_cb(void); */ extern void pgstat_backend_flush_io(bool nowait); +extern void pgstat_backend_flush_wal(bool nowait); extern PgStat_BackendPending *pgstat_prep_backend_pending(ProcNumber procnum); extern bool pgstat_backend_flush_cb(PgStat_EntryRef *entry_ref, bool nowait); extern void pgstat_backend_reset_timestamp_cb(PgStatShared_Common *header, TimestampTz ts); diff --git a/src/test/regress/expected/stats.out b/src/test/regress/expected/stats.out index a0317b7208..cc01fdf274 100644 --- a/src/test/regress/expected/stats.out +++ b/src/test/regress/expected/stats.out @@ -832,6 +832,8 @@ SELECT sessions > :db_stat_sessions FROM pg_stat_database WHERE datname = (SELEC SELECT num_requested AS rqst_ckpts_before FROM pg_stat_checkpointer \gset -- Test pg_stat_wal (and make a temp table so our temp schema exists) SELECT wal_bytes AS wal_bytes_before FROM pg_stat_wal \gset +-- Test pg_stat_get_backend_wal (and make a temp table so our temp schema exists) +SELECT wal_bytes AS backend_wal_bytes_before from pg_stat_get_backend_wal(pg_backend_pid()) \gset CREATE TEMP TABLE test_stats_temp AS SELECT 17; DROP TABLE test_stats_temp; -- Checkpoint twice: The checkpointer reports stats after reporting completion @@ -851,6 +853,18 @@ SELECT wal_bytes > :wal_bytes_before FROM pg_stat_wal; t (1 row) +SELECT pg_stat_force_next_flush(); + pg_stat_force_next_flush +-------------------------- + +(1 row) + +SELECT wal_bytes > :backend_wal_bytes_before FROM pg_stat_get_backend_wal(pg_backend_pid()); + ?column? +---------- + t +(1 row) + -- Test pg_stat_get_backend_idset() and some allied functions. -- In particular, verify that their notion of backend ID matches -- our temp schema index. diff --git a/src/test/regress/sql/stats.sql b/src/test/regress/sql/stats.sql index 399c72bbcf..28fe0a1a7d 100644 --- a/src/test/regress/sql/stats.sql +++ b/src/test/regress/sql/stats.sql @@ -423,6 +423,9 @@ SELECT num_requested AS rqst_ckpts_before FROM pg_stat_checkpointer \gset -- Test pg_stat_wal (and make a temp table so our temp schema exists) SELECT wal_bytes AS wal_bytes_before FROM pg_stat_wal \gset +-- Test pg_stat_get_backend_wal (and make a temp table so our temp schema exists) +SELECT wal_bytes AS backend_wal_bytes_before from pg_stat_get_backend_wal(pg_backend_pid()) \gset + CREATE TEMP TABLE test_stats_temp AS SELECT 17; DROP TABLE test_stats_temp; @@ -435,6 +438,9 @@ CHECKPOINT; SELECT num_requested > :rqst_ckpts_before FROM pg_stat_checkpointer; SELECT wal_bytes > :wal_bytes_before FROM pg_stat_wal; +SELECT pg_stat_force_next_flush(); +SELECT wal_bytes > :backend_wal_bytes_before FROM pg_stat_get_backend_wal(pg_backend_pid()); + -- Test pg_stat_get_backend_idset() and some allied functions. -- In particular, verify that their notion of backend ID matches -- our temp schema index. diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index 1c8516fd63..fe616a5770 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -2158,6 +2158,7 @@ PgStat_KindInfo PgStat_LocalState PgStat_PendingDroppedStatsItem PgStat_PendingIO +PgStat_PendingBackendWalStats PgStat_PendingWalStats PgStat_SLRUStats PgStat_ShmemControl @@ -2174,6 +2175,7 @@ PgStat_SubXactStatus PgStat_TableCounts PgStat_TableStatus PgStat_TableXactStatus +PgStat_WalCounters PgStat_WalStats PgXmlErrorContext PgXmlStrictness -- 2.34.1