Hi, On Thu, Jan 09, 2025 at 01:03:15PM +0900, Michael Paquier wrote: > On Wed, Jan 08, 2025 at 11:11:59AM +0000, Bertrand Drouvot wrote: > > Yeah, that's more elegant as it also means that the main callback will not > > change > > (should we add even more stats in the future). Done that way in v2 attached. > > I've put my hands on v2-0002 to begin with something. > > +/* flag bits for different types of statistics to flush */ > +#define PGSTAT_FLUSH_IO (1 << 0) /* Flush I/O statistics */ > +#define PGSTAT_FLUSH_ALL (PGSTAT_FLUSH_IO) > > These are located and used only in pgstat_backend.c. It seems to me > that we'd better declare them in pgstat_internal.h and extend the > existing pgstat_flush_backend() with an argument so as callers can do > what they want. > > + /* Get our own entry_ref if not provided */ > + if (!entry_ref) > + entry_ref = pgstat_get_entry_ref(PGSTAT_KIND_BACKEND, > InvalidOid, > + > MyProcNumber, false, NULL); > > This relates to the previous remark, actually, where I think that it > is cleaner to have pgstat_flush_backend() do pgstat_get_entry_ref(), > same way as HEAD, and just pass down the flags.
I see, so you keep pgstat_flush_backend() calls (with an extra arg) and remove the new "pgstat_backend_flush_io()" function. > This comes at the cost of pgstat_flush_backend_entry() > requiring an extra pgstat_tracks_backend_bktype(), which is not a big > issue, and the patch gets a bit shorter. Yeah, all of the above is fine by me. PFA v3 which is v2 refactoring with your proposed above changes. Regards, -- Bertrand Drouvot PostgreSQL Contributors Team RDS Open Source Databases Amazon Web Services: https://aws.amazon.com
>From bcdf4eb91f7ff4beeba50433a173ac2650afe432 Mon Sep 17 00:00:00 2001 From: Bertrand Drouvot <bertranddrouvot...@gmail.com> Date: Mon, 6 Jan 2025 07:51:27 +0000 Subject: [PATCH v3 1/3] Extract logic filling pg_stat_get_wal()'s tuple into its own routine This commit adds pg_stat_wal_build_tuple(), a helper routine for pg_stat_get_wal(), that fills its tuple based on the contents of PgStat_WalStats. This will be used in a follow-up commit that uses the same structures as pg_stat_wal for reporting, but for the PGSTAT_KIND_BACKEND statistics kind. --- src/backend/utils/adt/pgstatfuncs.c | 56 ++++++++++++++++++----------- 1 file changed, 36 insertions(+), 20 deletions(-) 100.0% src/backend/utils/adt/ diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c index 3245f3a8d8..7309f06993 100644 --- a/src/backend/utils/adt/pgstatfuncs.c +++ b/src/backend/utils/adt/pgstatfuncs.c @@ -1560,20 +1560,22 @@ pg_stat_get_backend_io(PG_FUNCTION_ARGS) } /* - * Returns statistics of WAL activity + * pg_stat_wal_build_tuple + * + * Helper routine for pg_stat_get_wal() returning one tuple based on the contents + * of wal_stats. */ -Datum -pg_stat_get_wal(PG_FUNCTION_ARGS) +static Datum +pg_stat_wal_build_tuple(PgStat_WalStats wal_stats) { -#define PG_STAT_GET_WAL_COLS 9 +#define PG_STAT_WAL_COLS 9 TupleDesc tupdesc; - Datum values[PG_STAT_GET_WAL_COLS] = {0}; - bool nulls[PG_STAT_GET_WAL_COLS] = {0}; + Datum values[PG_STAT_WAL_COLS] = {0}; + bool nulls[PG_STAT_WAL_COLS] = {0}; char buf[256]; - PgStat_WalStats *wal_stats; /* Initialise attributes information in the tuple descriptor */ - tupdesc = CreateTemplateTupleDesc(PG_STAT_GET_WAL_COLS); + tupdesc = CreateTemplateTupleDesc(PG_STAT_WAL_COLS); TupleDescInitEntry(tupdesc, (AttrNumber) 1, "wal_records", INT8OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 2, "wal_fpi", @@ -1595,34 +1597,48 @@ pg_stat_get_wal(PG_FUNCTION_ARGS) BlessTupleDesc(tupdesc); - /* Get statistics about WAL activity */ - wal_stats = pgstat_fetch_stat_wal(); - /* Fill values and NULLs */ - values[0] = Int64GetDatum(wal_stats->wal_records); - values[1] = Int64GetDatum(wal_stats->wal_fpi); + values[0] = Int64GetDatum(wal_stats.wal_records); + values[1] = Int64GetDatum(wal_stats.wal_fpi); /* Convert to numeric. */ - snprintf(buf, sizeof buf, UINT64_FORMAT, wal_stats->wal_bytes); + snprintf(buf, sizeof buf, UINT64_FORMAT, wal_stats.wal_bytes); values[2] = DirectFunctionCall3(numeric_in, CStringGetDatum(buf), ObjectIdGetDatum(0), Int32GetDatum(-1)); - values[3] = Int64GetDatum(wal_stats->wal_buffers_full); - values[4] = Int64GetDatum(wal_stats->wal_write); - values[5] = Int64GetDatum(wal_stats->wal_sync); + values[3] = Int64GetDatum(wal_stats.wal_buffers_full); + values[4] = Int64GetDatum(wal_stats.wal_write); + values[5] = Int64GetDatum(wal_stats.wal_sync); /* Convert counters from microsec to millisec for display */ - values[6] = Float8GetDatum(((double) wal_stats->wal_write_time) / 1000.0); - values[7] = Float8GetDatum(((double) wal_stats->wal_sync_time) / 1000.0); + values[6] = Float8GetDatum(((double) wal_stats.wal_write_time) / 1000.0); + values[7] = Float8GetDatum(((double) wal_stats.wal_sync_time) / 1000.0); - values[8] = TimestampTzGetDatum(wal_stats->stat_reset_timestamp); + if (wal_stats.stat_reset_timestamp != 0) + values[8] = TimestampTzGetDatum(wal_stats.stat_reset_timestamp); + else + nulls[8] = true; /* Returns the record as Datum */ PG_RETURN_DATUM(HeapTupleGetDatum(heap_form_tuple(tupdesc, values, nulls))); } +/* + * Returns statistics of WAL activity + */ +Datum +pg_stat_get_wal(PG_FUNCTION_ARGS) +{ + PgStat_WalStats *wal_stats; + + /* Get statistics about WAL activity */ + wal_stats = pgstat_fetch_stat_wal(); + + return (pg_stat_wal_build_tuple(*wal_stats)); +} + /* * Returns statistics of SLRU caches. */ -- 2.34.1
>From 9173b59075759fdd88f29bcf2b555e86c823704d Mon Sep 17 00:00:00 2001 From: Bertrand Drouvot <bertranddrouvot...@gmail.com> Date: Mon, 6 Jan 2025 08:44:29 +0000 Subject: [PATCH v3 2/3] PGSTAT_KIND_BACKEND code refactoring This commit refactors some come related to per backend statistics. It makes the code more generic or more IO statistics focused as it will be used in a follow-up commit that will introduce per backend WAL statistics. --- src/backend/utils/activity/pgstat.c | 2 +- src/backend/utils/activity/pgstat_backend.c | 70 ++++++++++++++------ src/backend/utils/activity/pgstat_io.c | 8 +-- src/backend/utils/activity/pgstat_relation.c | 4 +- src/backend/utils/adt/pgstatfuncs.c | 2 +- src/include/pgstat.h | 6 +- src/include/utils/pgstat_internal.h | 7 +- src/tools/pgindent/typedefs.list | 1 + 8 files changed, 69 insertions(+), 31 deletions(-) 79.4% src/backend/utils/activity/ 12.9% src/include/utils/ 4.5% src/include/ 3.0% src/ diff --git a/src/backend/utils/activity/pgstat.c b/src/backend/utils/activity/pgstat.c index 16a03b8ce1..34520535d5 100644 --- a/src/backend/utils/activity/pgstat.c +++ b/src/backend/utils/activity/pgstat.c @@ -370,7 +370,7 @@ static const PgStat_KindInfo pgstat_kind_builtin_infos[PGSTAT_KIND_BUILTIN_SIZE] .shared_size = sizeof(PgStatShared_Backend), .shared_data_off = offsetof(PgStatShared_Backend, stats), .shared_data_len = sizeof(((PgStatShared_Backend *) 0)->stats), - .pending_size = sizeof(PgStat_BackendPendingIO), + .pending_size = sizeof(PgStat_BackendPending), .flush_pending_cb = pgstat_backend_flush_cb, .reset_timestamp_cb = pgstat_backend_reset_timestamp_cb, diff --git a/src/backend/utils/activity/pgstat_backend.c b/src/backend/utils/activity/pgstat_backend.c index 1f91bfef0a..ea49208b80 100644 --- a/src/backend/utils/activity/pgstat_backend.c +++ b/src/backend/utils/activity/pgstat_backend.c @@ -39,23 +39,21 @@ pgstat_fetch_stat_backend(ProcNumber procNumber) } /* - * Flush out locally pending backend statistics - * - * If no stats have been recorded, this function returns false. + * Flush out locally pending backend IO statistics. Locking is managed + * by the caller. */ -bool -pgstat_backend_flush_cb(PgStat_EntryRef *entry_ref, bool nowait) +static void +pgstat_flush_backend_entry_io(PgStat_EntryRef *entry_ref) { - PgStatShared_Backend *shbackendioent; - PgStat_BackendPendingIO *pendingent; + PgStatShared_Backend *shbackendent; + PgStat_BackendPending *pendingent; PgStat_BktypeIO *bktype_shstats; + PgStat_BackendPendingIO *pending_io; - if (!pgstat_lock_entry(entry_ref, nowait)) - return false; - - shbackendioent = (PgStatShared_Backend *) entry_ref->shared_stats; - bktype_shstats = &shbackendioent->stats.stats; - pendingent = (PgStat_BackendPendingIO *) entry_ref->pending; + shbackendent = (PgStatShared_Backend *) entry_ref->shared_stats; + pendingent = (PgStat_BackendPending *) entry_ref->pending; + bktype_shstats = &shbackendent->stats.io_stats; + pending_io = &pendingent->pending_io; for (int io_object = 0; io_object < IOOBJECT_NUM_TYPES; io_object++) { @@ -66,15 +64,33 @@ pgstat_backend_flush_cb(PgStat_EntryRef *entry_ref, bool nowait) instr_time time; bktype_shstats->counts[io_object][io_context][io_op] += - pendingent->counts[io_object][io_context][io_op]; + pending_io->counts[io_object][io_context][io_op]; - time = pendingent->pending_times[io_object][io_context][io_op]; + time = pending_io->pending_times[io_object][io_context][io_op]; bktype_shstats->times[io_object][io_context][io_op] += INSTR_TIME_GET_MICROSEC(time); } } } +} + +/* + * Wrapper routine to flush backend statistics. + */ +static bool +pgstat_flush_backend_entry(PgStat_EntryRef *entry_ref, bool nowait, + bits32 flags) +{ + if (!pgstat_tracks_backend_bktype(MyBackendType)) + return false; + + if (!pgstat_lock_entry(entry_ref, nowait)) + return false; + + /* Flush requested statistics */ + if (flags & PGSTAT_BACKEND_FLUSH_IO) + pgstat_flush_backend_entry_io(entry_ref); pgstat_unlock_entry(entry_ref); @@ -82,10 +98,23 @@ pgstat_backend_flush_cb(PgStat_EntryRef *entry_ref, bool nowait) } /* - * Simpler wrapper of pgstat_backend_flush_cb() + * Callback to flush out locally pending backend statistics. + * + * If no stats have been recorded, this function returns false. + */ +bool +pgstat_backend_flush_cb(PgStat_EntryRef *entry_ref, bool nowait) +{ + return pgstat_flush_backend_entry(entry_ref, nowait, PGSTAT_BACKEND_FLUSH_ALL); +} + +/* + * Flush out locally pending backend statistics + * + * "flags" parameter controls which statistics to flush. */ void -pgstat_flush_backend(bool nowait) +pgstat_flush_backend(bool nowait, bits32 flags) { PgStat_EntryRef *entry_ref; @@ -94,7 +123,8 @@ pgstat_flush_backend(bool nowait) entry_ref = pgstat_get_entry_ref(PGSTAT_KIND_BACKEND, InvalidOid, MyProcNumber, false, NULL); - (void) pgstat_backend_flush_cb(entry_ref, nowait); + + (void) pgstat_flush_backend_entry(entry_ref, nowait, flags); } /* @@ -119,9 +149,9 @@ pgstat_create_backend(ProcNumber procnum) } /* - * Find or create a local PgStat_BackendPendingIO entry for proc number. + * Find or create a local PgStat_BackendPending entry for proc number. */ -PgStat_BackendPendingIO * +PgStat_BackendPending * pgstat_prep_backend_pending(ProcNumber procnum) { PgStat_EntryRef *entry_ref; diff --git a/src/backend/utils/activity/pgstat_io.c b/src/backend/utils/activity/pgstat_io.c index f9a1f91dba..a7445995d3 100644 --- a/src/backend/utils/activity/pgstat_io.c +++ b/src/backend/utils/activity/pgstat_io.c @@ -81,10 +81,10 @@ pgstat_count_io_op_n(IOObject io_object, IOContext io_context, IOOp io_op, uint3 if (pgstat_tracks_backend_bktype(MyBackendType)) { - PgStat_PendingIO *entry_ref; + PgStat_BackendPending *entry_ref; entry_ref = pgstat_prep_backend_pending(MyProcNumber); - entry_ref->counts[io_object][io_context][io_op] += cnt; + entry_ref->pending_io.counts[io_object][io_context][io_op] += cnt; } PendingIOStats.counts[io_object][io_context][io_op] += cnt; @@ -151,10 +151,10 @@ pgstat_count_io_op_time(IOObject io_object, IOContext io_context, IOOp io_op, if (pgstat_tracks_backend_bktype(MyBackendType)) { - PgStat_PendingIO *entry_ref; + PgStat_BackendPending *entry_ref; entry_ref = pgstat_prep_backend_pending(MyProcNumber); - INSTR_TIME_ADD(entry_ref->pending_times[io_object][io_context][io_op], + INSTR_TIME_ADD(entry_ref->pending_io.pending_times[io_object][io_context][io_op], io_time); } } diff --git a/src/backend/utils/activity/pgstat_relation.c b/src/backend/utils/activity/pgstat_relation.c index 2cc304f881..09247ba097 100644 --- a/src/backend/utils/activity/pgstat_relation.c +++ b/src/backend/utils/activity/pgstat_relation.c @@ -264,7 +264,7 @@ pgstat_report_vacuum(Oid tableoid, bool shared, * VACUUM command has processed all tables and committed. */ pgstat_flush_io(false); - pgstat_flush_backend(false); + pgstat_flush_backend(false, PGSTAT_BACKEND_FLUSH_IO); } /* @@ -351,7 +351,7 @@ pgstat_report_analyze(Relation rel, /* see pgstat_report_vacuum() */ pgstat_flush_io(false); - pgstat_flush_backend(false); + pgstat_flush_backend(false, PGSTAT_BACKEND_FLUSH_IO); } /* diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c index 7309f06993..8a4340e977 100644 --- a/src/backend/utils/adt/pgstatfuncs.c +++ b/src/backend/utils/adt/pgstatfuncs.c @@ -1544,7 +1544,7 @@ pg_stat_get_backend_io(PG_FUNCTION_ARGS) if (bktype == B_INVALID) return (Datum) 0; - bktype_stats = &backend_stats->stats; + bktype_stats = &backend_stats->io_stats; /* * In Assert builds, we can afford an extra loop through all of the diff --git a/src/include/pgstat.h b/src/include/pgstat.h index 0d8427f27d..6631bd2d73 100644 --- a/src/include/pgstat.h +++ b/src/include/pgstat.h @@ -381,7 +381,7 @@ typedef PgStat_PendingIO PgStat_BackendPendingIO; typedef struct PgStat_Backend { TimestampTz stat_reset_timestamp; - PgStat_BktypeIO stats; + PgStat_BktypeIO io_stats; } PgStat_Backend; typedef struct PgStat_StatDBEntry @@ -523,6 +523,10 @@ typedef struct PgStat_PendingWalStats instr_time wal_sync_time; } PgStat_PendingWalStats; +typedef struct PgStat_BackendPending +{ + PgStat_BackendPendingIO pending_io; +} PgStat_BackendPending; /* * Functions in pgstat.c diff --git a/src/include/utils/pgstat_internal.h b/src/include/utils/pgstat_internal.h index 52eb008710..4bb8e5c53a 100644 --- a/src/include/utils/pgstat_internal.h +++ b/src/include/utils/pgstat_internal.h @@ -613,9 +613,12 @@ extern void pgstat_archiver_snapshot_cb(void); * Functions in pgstat_backend.c */ -extern void pgstat_flush_backend(bool nowait); +/* flags for pgstat_flush_backend() */ +#define PGSTAT_BACKEND_FLUSH_IO (1 << 0) /* Flush I/O statistics */ +#define PGSTAT_BACKEND_FLUSH_ALL (PGSTAT_BACKEND_FLUSH_IO) -extern PgStat_BackendPendingIO *pgstat_prep_backend_pending(ProcNumber procnum); +extern void pgstat_flush_backend(bool nowait, bits32 flags); +extern PgStat_BackendPending *pgstat_prep_backend_pending(ProcNumber procnum); extern bool pgstat_backend_flush_cb(PgStat_EntryRef *entry_ref, bool nowait); extern void pgstat_backend_reset_timestamp_cb(PgStatShared_Common *header, TimestampTz ts); diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index 9f83ecf181..f15526236a 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -2140,6 +2140,7 @@ PgStatShared_Subscription PgStatShared_Wal PgStat_ArchiverStats PgStat_Backend +PgStat_BackendPending PgStat_BackendPendingIO PgStat_BackendSubEntry PgStat_BgWriterStats -- 2.34.1
>From b024c89167be988f31dee8330546ed96f43384d5 Mon Sep 17 00:00:00 2001 From: Bertrand Drouvot <bertranddrouvot...@gmail.com> Date: Mon, 6 Jan 2025 10:00:00 +0000 Subject: [PATCH v3 3/3] per backend WAL statistics Now that commit 9aea73fc61 added backend-level statistics to pgstats (and per backend IO statistics) we can more easily add per backend statistics. This commit adds per backend WAL statistics using the same layer as pg_stat_wal, except that it is now possible to know how much WAL activity is happening in each backend rather than an overall aggregate of all the activity. A function called pg_stat_get_backend_wal() is added to access this data depending on the PID of a backend. The same limitation as in 9aea73fc61 persists, meaning that Auxiliary processes are not included in this set of statistics. XXX: bump catalog version --- doc/src/sgml/config.sgml | 4 +- doc/src/sgml/monitoring.sgml | 19 +++++ src/backend/access/transam/xlog.c | 36 ++++++++- src/backend/utils/activity/pgstat_backend.c | 86 ++++++++++++++++++++- src/backend/utils/activity/pgstat_wal.c | 6 +- src/backend/utils/adt/pgstatfuncs.c | 78 +++++++++++++++---- src/include/catalog/pg_proc.dat | 7 ++ src/include/pgstat.h | 37 +++++++-- src/include/utils/pgstat_internal.h | 3 +- src/test/regress/expected/stats.out | 14 ++++ src/test/regress/sql/stats.sql | 6 ++ src/tools/pgindent/typedefs.list | 2 + 12 files changed, 268 insertions(+), 30 deletions(-) 10.1% doc/src/sgml/ 8.8% src/backend/access/transam/ 31.5% src/backend/utils/activity/ 28.3% src/backend/utils/adt/ 5.0% src/include/catalog/ 8.4% src/include/ 4.0% src/test/regress/expected/ 3.0% src/test/regress/sql/ diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index 8683f0bdf5..8e8478dcb1 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -8433,7 +8433,9 @@ COPY postgres_log FROM '/full/path/to/logfile.csv' WITH csv; measure the overhead of timing on your system. I/O timing information is displayed in <link linkend="monitoring-pg-stat-wal-view"> - <structname>pg_stat_wal</structname></link>. + <structname>pg_stat_wal</structname></link> and in the output of the + <link linkend="pg-stat-get-backend-wal"> + <function>pg_stat_get_backend_wal()</function></link> function. Only superusers and users with the appropriate <literal>SET</literal> privilege can change this setting. </para> diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml index d0d176cc54..84a2d09b76 100644 --- a/doc/src/sgml/monitoring.sgml +++ b/doc/src/sgml/monitoring.sgml @@ -4811,6 +4811,25 @@ description | Waiting for a newly initialized WAL file to reach durable storage </para></entry> </row> + <row> + <entry id="pg-stat-get-backend-wal" role="func_table_entry"><para role="func_signature"> + <indexterm> + <primary>pg_stat_get_backend_wal</primary> + </indexterm> + <function>pg_stat_get_backend_wal</function> ( <type>integer</type> ) + <returnvalue>record</returnvalue> + </para> + <para> + Returns WAL statistics about the backend with the specified + process ID. The output fields are exactly the same as the ones in the + <structname>pg_stat_wal</structname> view. + </para> + <para> + The function does not return WAL statistics for the checkpointer, + the background writer, the startup process and the autovacuum launcher. + </para></entry> + </row> + <row> <entry role="func_table_entry"><para role="func_signature"> <indexterm> diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index bf3dbda901..0ba9fcb277 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -2058,6 +2058,10 @@ AdvanceXLInsertBuffer(XLogRecPtr upto, TimeLineID tli, bool opportunistic) XLogWrite(WriteRqst, tli, false); LWLockRelease(WALWriteLock); PendingWalStats.wal_buffers_full++; + + if (pgstat_tracks_backend_bktype(MyBackendType)) + PendingBackendWalStats.wal_buffers_full++; + TRACE_POSTGRESQL_WAL_BUFFER_WRITE_DIRTY_DONE(); } /* Re-acquire WALBufMappingLock and retry */ @@ -2426,11 +2430,14 @@ XLogWrite(XLogwrtRqst WriteRqst, TimeLineID tli, bool flexible) Size nleft; ssize_t written; instr_time start; + instr_time end; /* OK to write the page(s) */ from = XLogCtl->pages + startidx * (Size) XLOG_BLCKSZ; nbytes = npages * (Size) XLOG_BLCKSZ; nleft = nbytes; + /* keep compiler quiet */ + INSTR_TIME_SET_ZERO(end); do { errno = 0; @@ -2451,14 +2458,26 @@ XLogWrite(XLogwrtRqst WriteRqst, TimeLineID tli, bool flexible) */ if (track_wal_io_timing) { - instr_time end; - INSTR_TIME_SET_CURRENT(end); INSTR_TIME_ACCUM_DIFF(PendingWalStats.wal_write_time, end, start); } PendingWalStats.wal_write++; + if (pgstat_tracks_backend_bktype(MyBackendType)) + { + /* + * We are inside a critical section, so we can't use + * pgstat_prep_pending_entry() and we rely on + * PendingBackendWalStats instead. + */ + PendingBackendWalStats.wal_write++; + + if (track_wal_io_timing) + INSTR_TIME_ACCUM_DIFF(PendingBackendWalStats.wal_write_time, + end, start); + } + if (written <= 0) { char xlogfname[MAXFNAMELEN]; @@ -8684,8 +8703,11 @@ issue_xlog_fsync(int fd, XLogSegNo segno, TimeLineID tli) { char *msg = NULL; instr_time start; + instr_time end; Assert(tli != 0); + /* keep compiler quiet */ + INSTR_TIME_SET_ZERO(end); /* * Quick exit if fsync is disabled or write() has already synced the WAL @@ -8751,13 +8773,19 @@ issue_xlog_fsync(int fd, XLogSegNo segno, TimeLineID tli) */ if (track_wal_io_timing) { - instr_time end; - INSTR_TIME_SET_CURRENT(end); INSTR_TIME_ACCUM_DIFF(PendingWalStats.wal_sync_time, end, start); } PendingWalStats.wal_sync++; + + if (pgstat_tracks_backend_bktype(MyBackendType)) + { + PendingBackendWalStats.wal_sync++; + + if (track_wal_io_timing) + INSTR_TIME_ACCUM_DIFF(PendingBackendWalStats.wal_sync_time, end, start); + } } /* diff --git a/src/backend/utils/activity/pgstat_backend.c b/src/backend/utils/activity/pgstat_backend.c index ea49208b80..c66cbf70b6 100644 --- a/src/backend/utils/activity/pgstat_backend.c +++ b/src/backend/utils/activity/pgstat_backend.c @@ -24,6 +24,16 @@ #include "utils/pgstat_internal.h" +PgStat_PendingWalStats PendingBackendWalStats = {0}; + +/* + * WAL usage counters saved from pgWalUsage at the previous call to + * pgstat_report_wal(). This is used to calculate how much WAL usage + * happens between pgstat_report_wal() calls, by subtracting + * the previous counters from the current ones. + */ +static WalUsage prevBackendWalUsage; + /* * Returns statistics of a backend by proc number. */ @@ -75,6 +85,75 @@ pgstat_flush_backend_entry_io(PgStat_EntryRef *entry_ref) } } +/* + * To determine whether any WAL activity has occurred since last time, not + * only the number of generated WAL records but also the numbers of WAL + * writes and syncs need to be checked. Because even transaction that + * generates no WAL records can write or sync WAL data when flushing the + * data pages. + */ +static bool +pgstat_backend_wal_have_pending(void) +{ + return pgWalUsage.wal_records != prevBackendWalUsage.wal_records || + PendingBackendWalStats.wal_write != 0 || + PendingBackendWalStats.wal_sync != 0; +} + +/* + * Flush out locally pending backend WAL statistics. Locking is managed + * by the caller. + */ +static void +pgstat_flush_backend_entry_wal(PgStat_EntryRef *entry_ref) +{ + PgStatShared_Backend *shbackendent; + PgStat_WalCounters *bktype_shstats; + WalUsage wal_usage_diff = {0}; + + /* + * This function can be called even if nothing at all has happened. Avoid + * taking lock for nothing in that case. + */ + if (!pgstat_backend_wal_have_pending()) + return; + + shbackendent = (PgStatShared_Backend *) entry_ref->shared_stats; + bktype_shstats = &shbackendent->stats.wal_stats; + + /* + * We don't update the WAL usage portion of the local WalStats elsewhere. + * Calculate how much WAL usage counters were increased by subtracting the + * previous counters from the current ones. + */ + WalUsageAccumDiff(&wal_usage_diff, &pgWalUsage, &prevBackendWalUsage); + +#define WALSTAT_ACC(fld, var_to_add) \ + (bktype_shstats->fld += var_to_add.fld) +#define WALSTAT_ACC_INSTR_TIME(fld) \ + (bktype_shstats->fld += INSTR_TIME_GET_MICROSEC(PendingBackendWalStats.fld)) + WALSTAT_ACC(wal_buffers_full, PendingBackendWalStats); + WALSTAT_ACC(wal_write, PendingBackendWalStats); + WALSTAT_ACC(wal_sync, PendingBackendWalStats); + WALSTAT_ACC(wal_records, wal_usage_diff); + WALSTAT_ACC(wal_fpi, wal_usage_diff); + WALSTAT_ACC(wal_bytes, wal_usage_diff); + WALSTAT_ACC_INSTR_TIME(wal_write_time); + WALSTAT_ACC_INSTR_TIME(wal_sync_time); +#undef WALSTAT_ACC_INSTR_TIME +#undef WALSTAT_ACC + + /* + * Save the current counters for the subsequent calculation of WAL usage. + */ + prevBackendWalUsage = pgWalUsage; + + /* + * Clear out the statistics buffer, so it can be re-used. + */ + MemSet(&PendingBackendWalStats, 0, sizeof(PendingWalStats)); +} + /* * Wrapper routine to flush backend statistics. */ @@ -92,6 +171,9 @@ pgstat_flush_backend_entry(PgStat_EntryRef *entry_ref, bool nowait, if (flags & PGSTAT_BACKEND_FLUSH_IO) pgstat_flush_backend_entry_io(entry_ref); + if (flags & PGSTAT_BACKEND_FLUSH_WAL) + pgstat_flush_backend_entry_wal(entry_ref); + pgstat_unlock_entry(entry_ref); return true; @@ -146,10 +228,12 @@ pgstat_create_backend(ProcNumber procnum) * e.g. if we previously used this proc number. */ memset(&shstatent->stats, 0, sizeof(shstatent->stats)); + + prevBackendWalUsage = pgWalUsage; } /* - * Find or create a local PgStat_BackendPending entry for proc number. + * Find or create a local PgStat_BackendPendingIO entry for proc number. */ PgStat_BackendPending * pgstat_prep_backend_pending(ProcNumber procnum) diff --git a/src/backend/utils/activity/pgstat_wal.c b/src/backend/utils/activity/pgstat_wal.c index 18fa6b2936..3362764226 100644 --- a/src/backend/utils/activity/pgstat_wal.c +++ b/src/backend/utils/activity/pgstat_wal.c @@ -55,6 +55,8 @@ pgstat_report_wal(bool force) /* flush wal stats */ pgstat_flush_wal(nowait); + pgstat_flush_backend(nowait, PGSTAT_BACKEND_FLUSH_WAL); + /* flush IO stats */ pgstat_flush_io(nowait); } @@ -117,9 +119,9 @@ pgstat_wal_flush_cb(bool nowait) return true; #define WALSTAT_ACC(fld, var_to_add) \ - (stats_shmem->stats.fld += var_to_add.fld) + (stats_shmem->stats.wal_counters.fld += var_to_add.fld) #define WALSTAT_ACC_INSTR_TIME(fld) \ - (stats_shmem->stats.fld += INSTR_TIME_GET_MICROSEC(PendingWalStats.fld)) + (stats_shmem->stats.wal_counters.fld += INSTR_TIME_GET_MICROSEC(PendingWalStats.fld)) WALSTAT_ACC(wal_records, wal_usage_diff); WALSTAT_ACC(wal_fpi, wal_usage_diff); WALSTAT_ACC(wal_bytes, wal_usage_diff); diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c index 8a4340e977..8d251dfcca 100644 --- a/src/backend/utils/adt/pgstatfuncs.c +++ b/src/backend/utils/adt/pgstatfuncs.c @@ -1562,11 +1562,12 @@ pg_stat_get_backend_io(PG_FUNCTION_ARGS) /* * pg_stat_wal_build_tuple * - * Helper routine for pg_stat_get_wal() returning one tuple based on the contents - * of wal_stats. + * Helper routine for pg_stat_get_wal() and pg_stat_get_backend_wal() returning + * one tuple based on the contents of wal_counters. */ static Datum -pg_stat_wal_build_tuple(PgStat_WalStats wal_stats) +pg_stat_wal_build_tuple(PgStat_WalCounters wal_counters, + TimestampTz stat_reset_timestamp) { #define PG_STAT_WAL_COLS 9 TupleDesc tupdesc; @@ -1598,26 +1599,26 @@ pg_stat_wal_build_tuple(PgStat_WalStats wal_stats) BlessTupleDesc(tupdesc); /* Fill values and NULLs */ - values[0] = Int64GetDatum(wal_stats.wal_records); - values[1] = Int64GetDatum(wal_stats.wal_fpi); + values[0] = Int64GetDatum(wal_counters.wal_records); + values[1] = Int64GetDatum(wal_counters.wal_fpi); /* Convert to numeric. */ - snprintf(buf, sizeof buf, UINT64_FORMAT, wal_stats.wal_bytes); + snprintf(buf, sizeof buf, UINT64_FORMAT, wal_counters.wal_bytes); values[2] = DirectFunctionCall3(numeric_in, CStringGetDatum(buf), ObjectIdGetDatum(0), Int32GetDatum(-1)); - values[3] = Int64GetDatum(wal_stats.wal_buffers_full); - values[4] = Int64GetDatum(wal_stats.wal_write); - values[5] = Int64GetDatum(wal_stats.wal_sync); + values[3] = Int64GetDatum(wal_counters.wal_buffers_full); + values[4] = Int64GetDatum(wal_counters.wal_write); + values[5] = Int64GetDatum(wal_counters.wal_sync); /* Convert counters from microsec to millisec for display */ - values[6] = Float8GetDatum(((double) wal_stats.wal_write_time) / 1000.0); - values[7] = Float8GetDatum(((double) wal_stats.wal_sync_time) / 1000.0); + values[6] = Float8GetDatum(((double) wal_counters.wal_write_time) / 1000.0); + values[7] = Float8GetDatum(((double) wal_counters.wal_sync_time) / 1000.0); - if (wal_stats.stat_reset_timestamp != 0) - values[8] = TimestampTzGetDatum(wal_stats.stat_reset_timestamp); + if (stat_reset_timestamp != 0) + values[8] = TimestampTzGetDatum(stat_reset_timestamp); else nulls[8] = true; @@ -1625,6 +1626,55 @@ pg_stat_wal_build_tuple(PgStat_WalStats wal_stats) PG_RETURN_DATUM(HeapTupleGetDatum(heap_form_tuple(tupdesc, values, nulls))); } +/* + * Returns WAL statistics for a backend with given PID. + */ +Datum +pg_stat_get_backend_wal(PG_FUNCTION_ARGS) +{ + int pid; + PGPROC *proc; + ProcNumber procNumber; + PgStat_Backend *backend_stats; + PgStat_WalCounters bktype_stats; + PgBackendStatus *beentry; + + pid = PG_GETARG_INT32(0); + proc = BackendPidGetProc(pid); + + /* + * This could be an auxiliary process but these do not report backend + * statistics due to pgstat_tracks_backend_bktype(), so there is no need + * for an extra call to AuxiliaryPidGetProc(). + */ + if (!proc) + PG_RETURN_NULL(); + + procNumber = GetNumberFromPGProc(proc); + + beentry = pgstat_get_beentry_by_proc_number(procNumber); + if (!beentry) + PG_RETURN_NULL(); + + backend_stats = pgstat_fetch_stat_backend(procNumber); + if (!backend_stats) + PG_RETURN_NULL(); + + /* if PID does not match, leave */ + if (beentry->st_procpid != pid) + PG_RETURN_NULL(); + + /* backend may be gone, so recheck in case */ + if (beentry->st_backendType == B_INVALID) + PG_RETURN_NULL(); + + bktype_stats = backend_stats->wal_stats; + + /* save tuples with data from this PgStat_BktypeIO */ + return (pg_stat_wal_build_tuple(bktype_stats, backend_stats->stat_reset_timestamp)); +} + + /* * Returns statistics of WAL activity */ @@ -1636,7 +1686,7 @@ pg_stat_get_wal(PG_FUNCTION_ARGS) /* Get statistics about WAL activity */ wal_stats = pgstat_fetch_stat_wal(); - return (pg_stat_wal_build_tuple(*wal_stats)); + return (pg_stat_wal_build_tuple(wal_stats->wal_counters, wal_stats->stat_reset_timestamp)); } /* diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index b37e8a6f88..72a5dae4b1 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -5929,6 +5929,13 @@ proargmodes => '{o,o,o,o,o,o,o,o,o}', proargnames => '{wal_records,wal_fpi,wal_bytes,wal_buffers_full,wal_write,wal_sync,wal_write_time,wal_sync_time,stats_reset}', prosrc => 'pg_stat_get_wal' }, +{ oid => '8037', descr => 'statistics: backend WAL activity', + proname => 'pg_stat_get_backend_wal', provolatile => 'v', + proparallel => 'r', prorettype => 'record', proargtypes => 'int4', + proallargtypes => '{int4,int8,int8,numeric,int8,int8,int8,float8,float8,timestamptz}', + proargmodes => '{i,o,o,o,o,o,o,o,o,o}', + proargnames => '{backend_pid,wal_records,wal_fpi,wal_bytes,wal_buffers_full,wal_write,wal_sync,wal_write_time,wal_sync_time,stats_reset}', + prosrc => 'pg_stat_get_backend_wal' }, { oid => '6248', descr => 'statistics: information about WAL prefetching', proname => 'pg_stat_get_recovery_prefetch', prorows => '1', proretset => 't', provolatile => 'v', prorettype => 'record', proargtypes => '', diff --git a/src/include/pgstat.h b/src/include/pgstat.h index 6631bd2d73..045877c5a8 100644 --- a/src/include/pgstat.h +++ b/src/include/pgstat.h @@ -378,12 +378,6 @@ typedef struct PgStat_IO /* Backend statistics store the same amount of IO data as PGSTAT_KIND_IO */ typedef PgStat_PendingIO PgStat_BackendPendingIO; -typedef struct PgStat_Backend -{ - TimestampTz stat_reset_timestamp; - PgStat_BktypeIO io_stats; -} PgStat_Backend; - typedef struct PgStat_StatDBEntry { PgStat_Counter xact_commit; @@ -495,7 +489,7 @@ typedef struct PgStat_StatTabEntry PgStat_Counter autoanalyze_count; } PgStat_StatTabEntry; -typedef struct PgStat_WalStats +typedef struct PgStat_WalCounters { PgStat_Counter wal_records; PgStat_Counter wal_fpi; @@ -505,6 +499,11 @@ typedef struct PgStat_WalStats PgStat_Counter wal_sync; PgStat_Counter wal_write_time; PgStat_Counter wal_sync_time; +} PgStat_WalCounters; + +typedef struct PgStat_WalStats +{ + PgStat_WalCounters wal_counters; TimestampTz stat_reset_timestamp; } PgStat_WalStats; @@ -523,9 +522,21 @@ typedef struct PgStat_PendingWalStats instr_time wal_sync_time; } PgStat_PendingWalStats; +typedef struct PgStat_Backend +{ + TimestampTz stat_reset_timestamp; + PgStat_BktypeIO io_stats; + PgStat_WalCounters wal_stats; +} PgStat_Backend; + typedef struct PgStat_BackendPending { PgStat_BackendPendingIO pending_io; + + /* + * We are not creating one member for PgStat_PendingWalStats. See the + * comment above the PendingBackendWalStats definition as to why. + */ } PgStat_BackendPending; /* @@ -857,5 +868,17 @@ extern PGDLLIMPORT SessionEndType pgStatSessionEndCause; /* updated directly by backends and background processes */ extern PGDLLIMPORT PgStat_PendingWalStats PendingWalStats; +/* + * Variables in pgstat_backend.c + */ + +/* updated directly by backends and background processes */ + +/* + * WAL pending statistics are incremented inside a critical section + * (see XLogWrite()), so we can't use pgstat_prep_pending_entry() and we rely on + * PendingBackendWalStats instead. + */ +extern PGDLLIMPORT PgStat_PendingWalStats PendingBackendWalStats; #endif /* PGSTAT_H */ diff --git a/src/include/utils/pgstat_internal.h b/src/include/utils/pgstat_internal.h index 4bb8e5c53a..05c3e21500 100644 --- a/src/include/utils/pgstat_internal.h +++ b/src/include/utils/pgstat_internal.h @@ -615,7 +615,8 @@ extern void pgstat_archiver_snapshot_cb(void); /* flags for pgstat_flush_backend() */ #define PGSTAT_BACKEND_FLUSH_IO (1 << 0) /* Flush I/O statistics */ -#define PGSTAT_BACKEND_FLUSH_ALL (PGSTAT_BACKEND_FLUSH_IO) +#define PGSTAT_BACKEND_FLUSH_WAL (1 << 1) /* Flush WAL statistics */ +#define PGSTAT_BACKEND_FLUSH_ALL (PGSTAT_BACKEND_FLUSH_IO | PGSTAT_BACKEND_FLUSH_WAL) extern void pgstat_flush_backend(bool nowait, bits32 flags); extern PgStat_BackendPending *pgstat_prep_backend_pending(ProcNumber procnum); diff --git a/src/test/regress/expected/stats.out b/src/test/regress/expected/stats.out index a0317b7208..cc01fdf274 100644 --- a/src/test/regress/expected/stats.out +++ b/src/test/regress/expected/stats.out @@ -832,6 +832,8 @@ SELECT sessions > :db_stat_sessions FROM pg_stat_database WHERE datname = (SELEC SELECT num_requested AS rqst_ckpts_before FROM pg_stat_checkpointer \gset -- Test pg_stat_wal (and make a temp table so our temp schema exists) SELECT wal_bytes AS wal_bytes_before FROM pg_stat_wal \gset +-- Test pg_stat_get_backend_wal (and make a temp table so our temp schema exists) +SELECT wal_bytes AS backend_wal_bytes_before from pg_stat_get_backend_wal(pg_backend_pid()) \gset CREATE TEMP TABLE test_stats_temp AS SELECT 17; DROP TABLE test_stats_temp; -- Checkpoint twice: The checkpointer reports stats after reporting completion @@ -851,6 +853,18 @@ SELECT wal_bytes > :wal_bytes_before FROM pg_stat_wal; t (1 row) +SELECT pg_stat_force_next_flush(); + pg_stat_force_next_flush +-------------------------- + +(1 row) + +SELECT wal_bytes > :backend_wal_bytes_before FROM pg_stat_get_backend_wal(pg_backend_pid()); + ?column? +---------- + t +(1 row) + -- Test pg_stat_get_backend_idset() and some allied functions. -- In particular, verify that their notion of backend ID matches -- our temp schema index. diff --git a/src/test/regress/sql/stats.sql b/src/test/regress/sql/stats.sql index 399c72bbcf..28fe0a1a7d 100644 --- a/src/test/regress/sql/stats.sql +++ b/src/test/regress/sql/stats.sql @@ -423,6 +423,9 @@ SELECT num_requested AS rqst_ckpts_before FROM pg_stat_checkpointer \gset -- Test pg_stat_wal (and make a temp table so our temp schema exists) SELECT wal_bytes AS wal_bytes_before FROM pg_stat_wal \gset +-- Test pg_stat_get_backend_wal (and make a temp table so our temp schema exists) +SELECT wal_bytes AS backend_wal_bytes_before from pg_stat_get_backend_wal(pg_backend_pid()) \gset + CREATE TEMP TABLE test_stats_temp AS SELECT 17; DROP TABLE test_stats_temp; @@ -435,6 +438,9 @@ CHECKPOINT; SELECT num_requested > :rqst_ckpts_before FROM pg_stat_checkpointer; SELECT wal_bytes > :wal_bytes_before FROM pg_stat_wal; +SELECT pg_stat_force_next_flush(); +SELECT wal_bytes > :backend_wal_bytes_before FROM pg_stat_get_backend_wal(pg_backend_pid()); + -- Test pg_stat_get_backend_idset() and some allied functions. -- In particular, verify that their notion of backend ID matches -- our temp schema index. diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index f15526236a..b593d8601e 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -2159,6 +2159,7 @@ PgStat_KindInfo PgStat_LocalState PgStat_PendingDroppedStatsItem PgStat_PendingIO +PgStat_PendingBackendWalStats PgStat_PendingWalStats PgStat_SLRUStats PgStat_ShmemControl @@ -2175,6 +2176,7 @@ PgStat_SubXactStatus PgStat_TableCounts PgStat_TableStatus PgStat_TableXactStatus +PgStat_WalCounters PgStat_WalStats PgXmlErrorContext PgXmlStrictness -- 2.34.1