Hi, > From 88740269660d00d548910c2f3aa631878c7cf0d4 Mon Sep 17 00:00:00 2001 > From: Kyotaro Horiguchi <horiguchi.kyot...@lab.ntt.co.jp> > Date: Thu, 21 Feb 2019 12:42:07 +0900 > Subject: [PATCH 4/6] Allow dsm to use on postmaster. > > DSM is inhibited to be used on postmaster. Shared memory baesd stats > collector needs it to work on postmaster and no problem found to do > that. Just allow it.
Maybe I'm missing something, but why? postmaster doesn't actually need to process stats messages in any way? > From 774b1495136db1ad6d174ab261487fdf6cb6a5ed Mon Sep 17 00:00:00 2001 > From: Kyotaro Horiguchi <horiguchi.kyot...@lab.ntt.co.jp> > Date: Thu, 21 Feb 2019 12:44:56 +0900 > Subject: [PATCH 5/6] Shared-memory based stats collector > > Previously activity statistics is shared via files on disk. Every > backend sends the numbers to the stats collector process via a socket. > It makes snapshots as a set of files on disk with a certain interval > then every backend reads them as necessary. It worked fine for > comparatively small set of statistics but the set is under the > pressure to growing up and the file size has reached the order of > megabytes. To deal with larger statistics set, this patch let backends > directly share the statistics via shared memory. Btw, you can make the life of a committer easier by collecting the reviewers and co-authors of a patch yourself... This desparately needs an introductory comment in pgstat.c or such explaining how the new scheme works. > +LWLock StatsMainLock; > +#define StatsLock (&StatsMainLock) Wait, what? You can't just define a lock this way. That's process local memory, locking that doesn't do anything useful. > +/* Shared stats bootstrap information */ > +typedef struct StatsShmemStruct { Please note that in PG's coding style the { comes in the next line. > +/* > + * Backends store various database-wide info that's waiting to be flushed > out > + * to shared memory in these variables. > + */ > +static int n_deadlocks = 0; > +static size_t n_tmpfiles = 0; > +static size_t n_tmpfilesize = 0; > + > +/* > + * have_recovery_conflicts represents the existence of any kind if conflict > + */ > +static bool have_recovery_conflicts = false; > +static int n_conflict_tablespace = 0; > +static int n_conflict_lock = 0; > +static int n_conflict_snapshot = 0; > +static int n_conflict_bufferpin = 0; > +static int n_conflict_startup_deadlock = 0; Probably worthwhile to group those into a struct, even just to make debugging easier. > > -/* ---------- > - * pgstat_init() - > - * > - * Called from postmaster at startup. Create the resources required > - * by the statistics collector process. If unable to do so, do not > - * fail --- better to let the postmaster start with stats collection > - * disabled. > - * ---------- > - */ > -void > -pgstat_init(void) > +static void > +pgstat_postmaster_shutdown(int code, Datum arg) You can't have a function like that without explaining why it's there. > + /* trash the stats on crash */ > + if (code == 0) > + pgstat_write_statsfiles(); > } And especially not without documenting what that code is supposed to mean. > pgstat_report_stat(bool force) > { > - /* we assume this inits to all zeroes: */ > - static const PgStat_TableCounts all_zeroes; > - static TimestampTz last_report = 0; > - > + static TimestampTz last_flush = 0; > + static TimestampTz pending_since = 0; > TimestampTz now; > - PgStat_MsgTabstat regular_msg; > - PgStat_MsgTabstat shared_msg; > - TabStatusArray *tsa; > - int i; > + pgstat_flush_stat_context cxt = {0}; > + bool have_other_stats = false; > + bool pending_stats = false; > + long elapsed; > + long secs; > + int usecs; > + > + /* Do we have anything to flush? */ > + if (have_recovery_conflicts || n_deadlocks != 0 || n_tmpfiles != 0) > + have_other_stats = true; > > /* Don't expend a clock check if nothing to do */ > if ((pgStatTabList == NULL || pgStatTabList->tsa_used == 0) && > pgStatXactCommit == 0 && pgStatXactRollback == 0 && > - !have_function_stats) > - return; > + !have_other_stats && !have_function_stats) > + return 0; "other" seems like a pretty mysterious category. Seems better to either name precisely, or just use the underlying variables for checks. > +/* ------- > + * Subroutines for pgstat_flush_stat. > + * ------- > + */ > + > +/* > + * snapshot_statentry() - Find an entry from source dshash with cache. > + * Is snapshot_statentry() really a subroutine for pgstat_flush_stat()? > +static void * > +snapshot_statentry(pgstat_snapshot_cxt *cxt, Oid key) > +{ > + char *lentry = NULL; > + size_t keysize = cxt->dsh_params->key_size; > + size_t dsh_entrysize = cxt->dsh_params->entry_size; > + bool found; > + bool *negative; > + > + /* caches the result entry */ > > /* > - * Don't send a message unless it's been at least PGSTAT_STAT_INTERVAL > - * msec since we last sent one, or the caller wants to force stats out. > + * Create new hash with arbitrary initial entries since we don't know > how > + * this hash will grow. The boolean put at the end of the entry is > + * negative flag. > */ That, uh, seems pretty ugly and hard to understand. > +/* > + * pgstat_flush_stat: Flushes table stats out to shared statistics. > + * > + * If nowait is true, returns with false if required lock was not acquired s/with false/false/ > + * immediately. In the case, infos of some tables may be left alone in TSA > to TSA? I assume TabStatusArray, but I don't think that's a common or useful abbreviation. It'd be ok to just refer to the variable name. > +static bool > +pgstat_flush_stat(pgstat_flush_stat_context *cxt, bool nowait) > +{ > + /* try to apply the tab stats */ > + if (!pgstat_flush_tabstat(cxt, nowait, entry)) > { > - pgstat_send_tabstat(this_msg); > - this_msg->m_nentries = 0; > + /* > + * Failed. Leave it alone filling at the > beginning in TSA. > + */ > + TabStatHashEntry *hash_entry; > + bool found; > + > + if (new_tsa_hash == NULL) > + new_tsa_hash = create_tabstat_hash(); > + > + /* Create hash entry for this entry */ > + hash_entry = hash_search(new_tsa_hash, > &entry->t_id, > + > HASH_ENTER, &found); > + Assert(!found); > + > + /* > + * Move insertion pointer to the next segment. > There must be > + * enough space segments since we are just > leaving some of the > + * current elements. > + */ > + if (dest_elem >= TABSTAT_QUANTUM) > + { > + Assert(dest_tsa->tsa_next != NULL); > + dest_tsa = dest_tsa->tsa_next; > + dest_elem = 0; > + } > + > + /* Move the entry if needed */ > + if (tsa != dest_tsa || i != dest_elem) > + { > + PgStat_TableStatus *new_entry; > + new_entry = > &dest_tsa->tsa_entries[dest_elem]; > + *new_entry = *entry; > + entry = new_entry; > + } > + > + hash_entry->tsa_entry = entry; > + dest_elem++; This seems a lot of work for just leaving an entry around to be processed later. Shouldn't code for that already exist elsewhere? > void > pgstat_vacuum_stat(void) > { > - HTAB *htab; > - PgStat_MsgTabpurge msg; > - PgStat_MsgFuncpurge f_msg; > - HASH_SEQ_STATUS hstat; > + HTAB *oidtab; > + dshash_table *dshtable; > + dshash_seq_status dshstat; > PgStat_StatDBEntry *dbentry; > PgStat_StatTabEntry *tabentry; > PgStat_StatFuncEntry *funcentry; > - int len; > > - if (pgStatSock == PGINVALID_SOCKET) > + /* we don't collect statistics under standalone mode */ > + if (!IsUnderPostmaster) > return; > > - /* > - * If not done for this transaction, read the statistics collector stats > - * file into some hash tables. > - */ > - backend_read_statsfile(); > + /* If not done for this transaction, take a snapshot of stats */ > + pgstat_snapshot_global_stats(); Hm, why do we need a snapshot here? > /* > * Now repeat the above steps for functions. However, we needn't bother > * in the common case where no function stats are being collected. > */ Can't we move the act of iterating through these hashes and probing against another hash into a helper function and reuse? These duplications aren't pretty. Greetings, Andres Freund