Hi, On Mon, Jan 20, 2025 at 09:11:16PM +0900, Michael Paquier wrote: > On Mon, Jan 20, 2025 at 11:10:40AM +0000, Bertrand Drouvot wrote: > > I think that it would be better to make the distinction based on > > "local/static" > > vs "dynamic memory" pending stats instead: I did so in v3 attached, using: > > > > .flush_dynamic_cb(): flushes pending entries tracked in dynamic memory > > .flush_static_cb(): flushes pending stats from static/global variable > > "static" and "dynamic" feel a bit off when used together. For > "static", the stats are from a static state but they can be pushed to > the dshash, as well, which is in dynamic shared memory.
Right, that's another way to find it confusing ;-). But if we keep focus on where the pending stats are stored, it is not. > Hmm. Thinking more about that, I think that we should leave the > existing "flush_pending_cb" alone. For the two others, how about > "have_static_pending_cb" and "flush_static_cb" rather than "fixed"? > It seems important to me to show the relationship between these two. Yeah, that works for me: done in v4 attached. Regards, -- Bertrand Drouvot PostgreSQL Contributors Team RDS Open Source Databases Amazon Web Services: https://aws.amazon.com
>From d57755402e9a9f672d2057c7f7ffa3832d402a3b Mon Sep 17 00:00:00 2001 From: Bertrand Drouvot <bertranddrouvot...@gmail.com> Date: Fri, 17 Jan 2025 06:44:47 +0000 Subject: [PATCH v4] Rework per backend pending stats 9aea73fc61d added per backend pending statistics but not all of them are well suited to memory allocation and have to be outside of critical sections. For those (the only current one is I/O statistics but WAL statistics is under discussion), let's rely on a new PendingBackendStats instead. --- src/backend/utils/activity/pgstat.c | 39 ++--- src/backend/utils/activity/pgstat_backend.c | 142 ++++++++++++------- src/backend/utils/activity/pgstat_io.c | 23 +-- src/backend/utils/activity/pgstat_relation.c | 4 +- src/include/pgstat.h | 9 ++ src/include/utils/pgstat_internal.h | 34 +++-- 6 files changed, 144 insertions(+), 107 deletions(-) 76.8% src/backend/utils/activity/ 19.1% src/include/utils/ 4.0% src/include/ diff --git a/src/backend/utils/activity/pgstat.c b/src/backend/utils/activity/pgstat.c index 34520535d54..d4fc03b5d39 100644 --- a/src/backend/utils/activity/pgstat.c +++ b/src/backend/utils/activity/pgstat.c @@ -372,7 +372,8 @@ static const PgStat_KindInfo pgstat_kind_builtin_infos[PGSTAT_KIND_BUILTIN_SIZE] .shared_data_len = sizeof(((PgStatShared_Backend *) 0)->stats), .pending_size = sizeof(PgStat_BackendPending), - .flush_pending_cb = pgstat_backend_flush_cb, + .have_static_pending_cb = pgstat_backend_have_pending_cb, + .flush_static_cb = pgstat_backend_flush_cb, .reset_timestamp_cb = pgstat_backend_reset_timestamp_cb, }, @@ -437,8 +438,8 @@ static const PgStat_KindInfo pgstat_kind_builtin_infos[PGSTAT_KIND_BUILTIN_SIZE] .shared_data_off = offsetof(PgStatShared_IO, stats), .shared_data_len = sizeof(((PgStatShared_IO *) 0)->stats), - .flush_fixed_cb = pgstat_io_flush_cb, - .have_fixed_pending_cb = pgstat_io_have_pending_cb, + .flush_static_cb = pgstat_io_flush_cb, + .have_static_pending_cb = pgstat_io_have_pending_cb, .init_shmem_cb = pgstat_io_init_shmem_cb, .reset_all_cb = pgstat_io_reset_all_cb, .snapshot_cb = pgstat_io_snapshot_cb, @@ -455,8 +456,8 @@ static const PgStat_KindInfo pgstat_kind_builtin_infos[PGSTAT_KIND_BUILTIN_SIZE] .shared_data_off = offsetof(PgStatShared_SLRU, stats), .shared_data_len = sizeof(((PgStatShared_SLRU *) 0)->stats), - .flush_fixed_cb = pgstat_slru_flush_cb, - .have_fixed_pending_cb = pgstat_slru_have_pending_cb, + .flush_static_cb = pgstat_slru_flush_cb, + .have_static_pending_cb = pgstat_slru_have_pending_cb, .init_shmem_cb = pgstat_slru_init_shmem_cb, .reset_all_cb = pgstat_slru_reset_all_cb, .snapshot_cb = pgstat_slru_snapshot_cb, @@ -474,8 +475,8 @@ static const PgStat_KindInfo pgstat_kind_builtin_infos[PGSTAT_KIND_BUILTIN_SIZE] .shared_data_len = sizeof(((PgStatShared_Wal *) 0)->stats), .init_backend_cb = pgstat_wal_init_backend_cb, - .flush_fixed_cb = pgstat_wal_flush_cb, - .have_fixed_pending_cb = pgstat_wal_have_pending_cb, + .flush_static_cb = pgstat_wal_flush_cb, + .have_static_pending_cb = pgstat_wal_have_pending_cb, .init_shmem_cb = pgstat_wal_init_shmem_cb, .reset_all_cb = pgstat_wal_reset_all_cb, .snapshot_cb = pgstat_wal_snapshot_cb, @@ -713,22 +714,17 @@ pgstat_report_stat(bool force) { bool do_flush = false; - /* Check for pending fixed-numbered stats */ + /* Check for pending stats */ for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; kind++) { const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind); if (!kind_info) continue; - if (!kind_info->fixed_amount) - { - Assert(kind_info->have_fixed_pending_cb == NULL); - continue; - } - if (!kind_info->have_fixed_pending_cb) + if (!kind_info->have_static_pending_cb) continue; - if (kind_info->have_fixed_pending_cb()) + if (kind_info->have_static_pending_cb()) { do_flush = true; break; @@ -789,25 +785,20 @@ pgstat_report_stat(bool force) partial_flush = false; - /* flush of variable-numbered stats */ + /* flush of variable-numbered stats tracked in pending entries list */ partial_flush |= pgstat_flush_pending_entries(nowait); - /* flush of fixed-numbered stats */ + /* flush stats for each registered kind that has a flush static callback */ for (PgStat_Kind kind = PGSTAT_KIND_MIN; kind <= PGSTAT_KIND_MAX; kind++) { const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind); if (!kind_info) continue; - if (!kind_info->fixed_amount) - { - Assert(kind_info->flush_fixed_cb == NULL); - continue; - } - if (!kind_info->flush_fixed_cb) + if (!kind_info->flush_static_cb) continue; - partial_flush |= kind_info->flush_fixed_cb(nowait); + partial_flush |= kind_info->flush_static_cb(nowait); } last_flush = now; diff --git a/src/backend/utils/activity/pgstat_backend.c b/src/backend/utils/activity/pgstat_backend.c index 79e4d0a3053..bcae6a78169 100644 --- a/src/backend/utils/activity/pgstat_backend.c +++ b/src/backend/utils/activity/pgstat_backend.c @@ -11,7 +11,9 @@ * This statistics kind uses a proc number as object ID for the hash table * of pgstats. Entries are created each time a process is spawned, and are * dropped when the process exits. These are not written to the pgstats file - * on disk. + * on disk. Pending statistics are managed without direct interactions with + * the pgstats dshash, relying on PendingBackendStats instead so as it is + * possible to report data within critical sections. * * Copyright (c) 2001-2025, PostgreSQL Global Development Group * @@ -22,8 +24,49 @@ #include "postgres.h" +#include "storage/bufmgr.h" +#include "utils/memutils.h" #include "utils/pgstat_internal.h" +/* + * Backend statistics counts waiting to be flushed out. These counters may be + * reported within critical sections so we use static memory in order to avoid + * memory allocation. + */ +static PgStat_BackendPending PendingBackendStats; + +/* + * Utility routines to report I/O stats for backends, kept here to avoid + * exposing PendingBackendStats to the outside world. + */ +void +pgstat_count_backend_io_op_time(IOObject io_object, IOContext io_context, + IOOp io_op, instr_time io_time) +{ + Assert(track_io_timing); + + if (!pgstat_tracks_backend_bktype(MyBackendType)) + return; + + Assert(pgstat_tracks_io_op(MyBackendType, io_object, io_context, io_op)); + + INSTR_TIME_ADD(PendingBackendStats.pending_io.pending_times[io_object][io_context][io_op], + io_time); +} + +void +pgstat_count_backend_io_op(IOObject io_object, IOContext io_context, + IOOp io_op, uint32 cnt, uint64 bytes) +{ + if (!pgstat_tracks_backend_bktype(MyBackendType)) + return; + + Assert(pgstat_tracks_io_op(MyBackendType, io_object, io_context, io_op)); + + PendingBackendStats.pending_io.counts[io_object][io_context][io_op] += cnt; + PendingBackendStats.pending_io.bytes[io_object][io_context][io_op] += bytes; +} + /* * Returns statistics of a backend by proc number. */ @@ -46,14 +89,21 @@ static void pgstat_flush_backend_entry_io(PgStat_EntryRef *entry_ref) { PgStatShared_Backend *shbackendent; - PgStat_BackendPending *pendingent; PgStat_BktypeIO *bktype_shstats; - PgStat_PendingIO *pending_io; + PgStat_PendingIO pending_io; + + /* + * This function can be called even if nothing at all has happened for IO + * statistics. In this case, avoid unnecessarily modifying the stats + * entry. + */ + if (pg_memory_is_all_zeros(&PendingBackendStats.pending_io, + sizeof(struct PgStat_PendingIO))) + return; shbackendent = (PgStatShared_Backend *) entry_ref->shared_stats; - pendingent = (PgStat_BackendPending *) entry_ref->pending; bktype_shstats = &shbackendent->stats.io_stats; - pending_io = &pendingent->pending_io; + pending_io = PendingBackendStats.pending_io; for (int io_object = 0; io_object < IOOBJECT_NUM_TYPES; io_object++) { @@ -64,68 +114,74 @@ pgstat_flush_backend_entry_io(PgStat_EntryRef *entry_ref) instr_time time; bktype_shstats->counts[io_object][io_context][io_op] += - pending_io->counts[io_object][io_context][io_op]; + pending_io.counts[io_object][io_context][io_op]; bktype_shstats->bytes[io_object][io_context][io_op] += - pending_io->bytes[io_object][io_context][io_op]; - - time = pending_io->pending_times[io_object][io_context][io_op]; + pending_io.bytes[io_object][io_context][io_op]; + time = pending_io.pending_times[io_object][io_context][io_op]; bktype_shstats->times[io_object][io_context][io_op] += INSTR_TIME_GET_MICROSEC(time); } } } + + /* + * Clear out the statistics buffer, so it can be re-used. + */ + MemSet(&PendingBackendStats.pending_io, 0, sizeof(PgStat_PendingIO)); } /* - * Wrapper routine to flush backend statistics. + * Flush out locally pending backend statistics + * + * "flags" parameter controls which statistics to flush. Returns true + * if some statistics could not be flushed. */ -static bool -pgstat_flush_backend_entry(PgStat_EntryRef *entry_ref, bool nowait, - bits32 flags) +bool +pgstat_flush_backend(bool nowait, bits32 flags) { - if (!pgstat_tracks_backend_bktype(MyBackendType)) + PgStat_EntryRef *entry_ref; + + if (pg_memory_is_all_zeros(&PendingBackendStats, + sizeof(struct PgStat_BackendPending))) return false; - if (!pgstat_lock_entry(entry_ref, nowait)) + if (!pgstat_tracks_backend_bktype(MyBackendType)) return false; + entry_ref = pgstat_get_entry_ref_locked(PGSTAT_KIND_BACKEND, InvalidOid, + MyProcNumber, nowait); + if (!entry_ref) + return true; + /* Flush requested statistics */ if (flags & PGSTAT_BACKEND_FLUSH_IO) pgstat_flush_backend_entry_io(entry_ref); pgstat_unlock_entry(entry_ref); - return true; + return false; } /* - * Callback to flush out locally pending backend statistics. - * - * If no stats have been recorded, this function returns false. + * Check if there are any backend stats waiting for flush. */ bool -pgstat_backend_flush_cb(PgStat_EntryRef *entry_ref, bool nowait) +pgstat_backend_have_pending_cb(void) { - return pgstat_flush_backend_entry(entry_ref, nowait, PGSTAT_BACKEND_FLUSH_ALL); + return (!pg_memory_is_all_zeros(&PendingBackendStats, + sizeof(struct PgStat_BackendPending))); } /* - * Flush out locally pending backend statistics + * Callback to flush out locally pending backend statistics. * - * "flags" parameter controls which statistics to flush. + * If some stats could not be flushed, return true. */ -void -pgstat_flush_backend(bool nowait, bits32 flags) +bool +pgstat_backend_flush_cb(bool nowait) { - PgStat_EntryRef *entry_ref; - - if (!pgstat_tracks_backend_bktype(MyBackendType)) - return; - - entry_ref = pgstat_get_entry_ref(PGSTAT_KIND_BACKEND, InvalidOid, - MyProcNumber, false, NULL); - (void) pgstat_flush_backend_entry(entry_ref, nowait, flags); + return pgstat_flush_backend(nowait, PGSTAT_BACKEND_FLUSH_ALL); } /* @@ -137,9 +193,8 @@ pgstat_create_backend(ProcNumber procnum) PgStat_EntryRef *entry_ref; PgStatShared_Backend *shstatent; - entry_ref = pgstat_prep_pending_entry(PGSTAT_KIND_BACKEND, InvalidOid, - procnum, NULL); - + entry_ref = pgstat_get_entry_ref_locked(PGSTAT_KIND_BACKEND, InvalidOid, + MyProcNumber, false); shstatent = (PgStatShared_Backend *) entry_ref->shared_stats; /* @@ -147,20 +202,9 @@ pgstat_create_backend(ProcNumber procnum) * e.g. if we previously used this proc number. */ memset(&shstatent->stats, 0, sizeof(shstatent->stats)); -} - -/* - * Find or create a local PgStat_BackendPending entry for proc number. - */ -PgStat_BackendPending * -pgstat_prep_backend_pending(ProcNumber procnum) -{ - PgStat_EntryRef *entry_ref; - - entry_ref = pgstat_prep_pending_entry(PGSTAT_KIND_BACKEND, InvalidOid, - procnum, NULL); + pgstat_unlock_entry(entry_ref); - return entry_ref->pending; + MemSet(&PendingBackendStats, 0, sizeof(PgStat_BackendPending)); } /* diff --git a/src/backend/utils/activity/pgstat_io.c b/src/backend/utils/activity/pgstat_io.c index 027aad8b24e..6ff5d9e96a1 100644 --- a/src/backend/utils/activity/pgstat_io.c +++ b/src/backend/utils/activity/pgstat_io.c @@ -73,18 +73,12 @@ pgstat_count_io_op(IOObject io_object, IOContext io_context, IOOp io_op, Assert(pgstat_is_ioop_tracked_in_bytes(io_op) || bytes == 0); Assert(pgstat_tracks_io_op(MyBackendType, io_object, io_context, io_op)); - if (pgstat_tracks_backend_bktype(MyBackendType)) - { - PgStat_BackendPending *entry_ref; - - entry_ref = pgstat_prep_backend_pending(MyProcNumber); - entry_ref->pending_io.counts[io_object][io_context][io_op] += cnt; - entry_ref->pending_io.bytes[io_object][io_context][io_op] += bytes; - } - PendingIOStats.counts[io_object][io_context][io_op] += cnt; PendingIOStats.bytes[io_object][io_context][io_op] += bytes; + /* Add the per-backend counts */ + pgstat_count_backend_io_op(io_object, io_context, io_op, cnt, bytes); + have_iostats = true; } @@ -145,14 +139,9 @@ pgstat_count_io_op_time(IOObject io_object, IOContext io_context, IOOp io_op, INSTR_TIME_ADD(PendingIOStats.pending_times[io_object][io_context][io_op], io_time); - if (pgstat_tracks_backend_bktype(MyBackendType)) - { - PgStat_BackendPending *entry_ref; - - entry_ref = pgstat_prep_backend_pending(MyProcNumber); - INSTR_TIME_ADD(entry_ref->pending_io.pending_times[io_object][io_context][io_op], - io_time); - } + /* Add the per-backend count */ + pgstat_count_backend_io_op_time(io_object, io_context, io_op, + io_time); } pgstat_count_io_op(io_object, io_context, io_op, cnt, bytes); diff --git a/src/backend/utils/activity/pgstat_relation.c b/src/backend/utils/activity/pgstat_relation.c index 09247ba0971..965a7fe2c64 100644 --- a/src/backend/utils/activity/pgstat_relation.c +++ b/src/backend/utils/activity/pgstat_relation.c @@ -264,7 +264,7 @@ pgstat_report_vacuum(Oid tableoid, bool shared, * VACUUM command has processed all tables and committed. */ pgstat_flush_io(false); - pgstat_flush_backend(false, PGSTAT_BACKEND_FLUSH_IO); + (void) pgstat_flush_backend(false, PGSTAT_BACKEND_FLUSH_IO); } /* @@ -351,7 +351,7 @@ pgstat_report_analyze(Relation rel, /* see pgstat_report_vacuum() */ pgstat_flush_io(false); - pgstat_flush_backend(false, PGSTAT_BACKEND_FLUSH_IO); + (void) pgstat_flush_backend(false, PGSTAT_BACKEND_FLUSH_IO); } /* diff --git a/src/include/pgstat.h b/src/include/pgstat.h index 2d40fe3e70f..d0d45150977 100644 --- a/src/include/pgstat.h +++ b/src/include/pgstat.h @@ -540,6 +540,15 @@ extern PgStat_ArchiverStats *pgstat_fetch_stat_archiver(void); * Functions in pgstat_backend.c */ +/* used by pgstat_io.c for I/O stats tracked in backends */ +extern void pgstat_count_backend_io_op_time(IOObject io_object, + IOContext io_context, + IOOp io_op, + instr_time io_time); +extern void pgstat_count_backend_io_op(IOObject io_object, + IOContext io_context, + IOOp io_op, uint32 cnt, + uint64 bytes); extern PgStat_Backend *pgstat_fetch_stat_backend(ProcNumber procNumber); extern bool pgstat_tracks_backend_bktype(BackendType bktype); extern void pgstat_create_backend(ProcNumber procnum); diff --git a/src/include/utils/pgstat_internal.h b/src/include/utils/pgstat_internal.h index 4bb8e5c53ab..c0e77488aed 100644 --- a/src/include/utils/pgstat_internal.h +++ b/src/include/utils/pgstat_internal.h @@ -156,8 +156,8 @@ typedef struct PgStat_EntryRef * Pending statistics data that will need to be flushed to shared memory * stats eventually. Each stats kind utilizing pending data defines what * format its pending data has and needs to provide a - * PgStat_KindInfo->flush_pending_cb callback to merge pending into shared - * stats. + * PgStat_KindInfo->flush_pending_cb callback to merge pending entries + * that are in dynamic memory into shared stats. */ void *pending; dlist_node pending_node; /* membership in pgStatPending list */ @@ -259,8 +259,9 @@ typedef struct PgStat_KindInfo void (*init_backend_cb) (void); /* - * For variable-numbered stats: flush pending stats. Required if pending - * data is used. See flush_fixed_cb for fixed-numbered stats. + * For variable-numbered stats: flush pending stats entries in dynamic + * memory within the dshash. Required if pending data interacts with the + * pgstats dshash. */ bool (*flush_pending_cb) (PgStat_EntryRef *sr, bool nowait); @@ -289,17 +290,19 @@ typedef struct PgStat_KindInfo void (*init_shmem_cb) (void *stats); /* - * For fixed-numbered statistics: Flush pending stats. Returns true if - * some of the stats could not be flushed, due to lock contention for - * example. Optional. + * For fixed-numbered or variable-numbered statistics: Check for pending + * stats in need of flush, when these do not use the pgstats dshash. + * Returns true if there are any stats pending for flush, triggering + * flush_cb. Optional. */ - bool (*flush_fixed_cb) (bool nowait); + bool (*have_static_pending_cb) (void); /* - * For fixed-numbered statistics: Check for pending stats in need of - * flush. Returns true if there are any stats pending for flush. Optional. + * For fixed-numbered or variable-numbered statistics: Flush pending + * static stats. Returns true if some of the stats could not be flushed, + * due to lock contention for example. Optional. */ - bool (*have_fixed_pending_cb) (void); + bool (*flush_static_cb) (bool nowait); /* * For fixed-numbered statistics: Reset All. @@ -617,10 +620,11 @@ extern void pgstat_archiver_snapshot_cb(void); #define PGSTAT_BACKEND_FLUSH_IO (1 << 0) /* Flush I/O statistics */ #define PGSTAT_BACKEND_FLUSH_ALL (PGSTAT_BACKEND_FLUSH_IO) -extern void pgstat_flush_backend(bool nowait, bits32 flags); -extern PgStat_BackendPending *pgstat_prep_backend_pending(ProcNumber procnum); -extern bool pgstat_backend_flush_cb(PgStat_EntryRef *entry_ref, bool nowait); -extern void pgstat_backend_reset_timestamp_cb(PgStatShared_Common *header, TimestampTz ts); +extern bool pgstat_flush_backend(bool nowait, bits32 flags); +extern bool pgstat_backend_flush_cb(bool nowait); +extern bool pgstat_backend_have_pending_cb(void); +extern void pgstat_backend_reset_timestamp_cb(PgStatShared_Common *header, + TimestampTz ts); /* * Functions in pgstat_bgwriter.c -- 2.34.1