Hi, thanks for the reviews. I updated the attached patch. The summary of the changes is following.
1. fix document I followed another view's comments. 2. refactor issue_xlog_fsync() I removed "sync_called" variables, narrowed the "duration" scope and change the switch statement to if statement. 3. make wal-receiver report WAL statistics I add the code to collect the statistics for a written operation in XLogWalRcvWrite() and to report stats in WalReceiverMain(). Since WalReceiverMain() can loop fast, to avoid loading stats collector, I add "force" argument to the pgstat_send_wal function. If "force" isfalse, it can skip reporting until at least 500 msec since it last reported. WalReceiverMain() almost calls pgstat_send_wal() with "force" as false.
Regards, -- Masahiro Ikeda NTT DATA CORPORATION
--- v5-0001-Add-statistics-related-to-write-sync-wal-records.patch 2021-01-23 09:26:01.919248712 +0900 +++ v6-0001-Add-statistics-related-to-write-sync-wal-records.patch 2021-01-25 16:27:50.749429666 +0900 @@ -1,6 +1,6 @@ -From ee1b7d17391b9d9619f709afeacdd118973471d6 Mon Sep 17 00:00:00 2001 +From e9aad92097c5cff5565b67ce1a8ec6d7b4c8a4d9 Mon Sep 17 00:00:00 2001 From: Masahiro Ikeda <ikeda...@oss.nttdata.com> -Date: Fri, 22 Jan 2021 21:38:31 +0900 +Date: Mon, 25 Jan 2021 16:26:04 +0900 Subject: [PATCH] Add statistics related to write/sync wal records. This patch adds following statistics to pg_stat_wal view @@ -24,20 +24,22 @@ (This requires a catversion bump, as well as an update to PGSTAT_FILE_FORMAT_ID) --- - doc/src/sgml/config.sgml | 21 +++++++ - doc/src/sgml/monitoring.sgml | 48 ++++++++++++++- - src/backend/access/transam/xlog.c | 59 ++++++++++++++++++- + doc/src/sgml/config.sgml | 21 ++++++++ + doc/src/sgml/monitoring.sgml | 48 ++++++++++++++++- + src/backend/access/transam/xlog.c | 51 ++++++++++++++++++- src/backend/catalog/system_views.sql | 4 ++ - src/backend/postmaster/pgstat.c | 4 ++ - src/backend/postmaster/walwriter.c | 3 + - src/backend/utils/adt/pgstatfuncs.c | 24 +++++++- - src/backend/utils/misc/guc.c | 9 +++ + src/backend/postmaster/checkpointer.c | 2 +- + src/backend/postmaster/pgstat.c | 26 ++++++++-- + src/backend/postmaster/walwriter.c | 3 ++ + src/backend/replication/walreceiver.c | 30 +++++++++++ + src/backend/utils/adt/pgstatfuncs.c | 24 +++++++-- + src/backend/utils/misc/guc.c | 9 ++++ src/backend/utils/misc/postgresql.conf.sample | 1 + src/include/access/xlog.h | 1 + src/include/catalog/pg_proc.dat | 14 ++--- - src/include/pgstat.h | 8 +++ - src/test/regress/expected/rules.out | 6 +- - 13 files changed, 189 insertions(+), 13 deletions(-) + src/include/pgstat.h | 10 +++- + src/test/regress/expected/rules.out | 6 ++- + 15 files changed, 232 insertions(+), 18 deletions(-) diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index 82864bbb24..43f3fbcaf8 100644 @@ -72,7 +74,7 @@ <term><varname>track_functions</varname> (<type>enum</type>) <indexterm> diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml -index f05140dd42..36764dc64f 100644 +index f05140dd42..5a8fc4eb0c 100644 --- a/doc/src/sgml/monitoring.sgml +++ b/doc/src/sgml/monitoring.sgml @@ -3485,7 +3485,53 @@ SELECT pid, wait_event_type, wait_event FROM pg_stat_activity WHERE wait_event i @@ -80,7 +82,7 @@ </para> <para> - Number of times WAL data was written to disk because WAL buffers became full -+ Total number of times WAL data was written to disk because WAL buffers became full ++ Total number of WAL data written to disk because WAL buffers became full + </para></entry> + </row> + @@ -89,7 +91,7 @@ + <structfield>wal_write</structfield> <type>bigint</type> + </para> + <para> -+ Total number of times WAL data was written to disk ++ Total number of WAL data written to disk + </para></entry> + </row> + @@ -109,7 +111,7 @@ + <structfield>wal_sync</structfield> <type>bigint</type> + </para> + <para> -+ Total number of times WAL data was synced to disk ++ Total number of WAL data synced to disk + (if <xref linkend="guc-wal-sync-method"/> is <literal>open_datasync</literal> or + <literal>open_sync</literal>, this value is zero because WAL data is synced + when to write it). @@ -131,7 +133,7 @@ </row> diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c -index 470e113b33..b3890f97a2 100644 +index 470e113b33..1c4860bee7 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -110,6 +110,7 @@ int CommitDelay = 0; /* precommit delay in microseconds */ @@ -142,16 +144,15 @@ #ifdef WAL_DEBUG bool XLOG_DEBUG = false; -@@ -2540,6 +2541,8 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible) +@@ -2540,6 +2541,7 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible) Size nbytes; Size nleft; int written; + instr_time start; -+ instr_time duration; /* OK to write the page(s) */ from = XLogCtl->pages + startidx * (Size) XLOG_BLCKSZ; -@@ -2548,9 +2551,24 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible) +@@ -2548,9 +2550,27 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible) do { errno = 0; @@ -164,8 +165,11 @@ written = pg_pwrite(openLogFile, from, nleft, startoffset); pgstat_report_wait_end(); + ++ /* increment the i/o timing and the number of WAL data written */ + if (track_wal_io_timing) + { ++ instr_time duration; ++ + INSTR_TIME_SET_CURRENT(duration); + INSTR_TIME_SUBTRACT(duration, start); + WalStats.m_wal_write_time = INSTR_TIME_GET_MICROSEC(duration); @@ -176,57 +180,47 @@ if (written <= 0) { char xlogfname[MAXFNAMELEN]; -@@ -10565,7 +10583,33 @@ assign_xlog_sync_method(int new_sync_method, void *extra) +@@ -10565,7 +10585,12 @@ assign_xlog_sync_method(int new_sync_method, void *extra) void issue_xlog_fsync(int fd, XLogSegNo segno) { - char *msg = NULL; + char *msg = NULL; -+ bool sync_called = false; /* whether to sync data right now. */ + instr_time start; -+ instr_time duration; -+ -+ /* check whether to sync WAL data to the disk right now. */ -+ if (enableFsync) -+ { -+ switch (sync_method) -+ { -+ case SYNC_METHOD_FSYNC: -+#ifdef HAVE_FSYNC_WRITETHROUGH -+ case SYNC_METHOD_FSYNC_WRITETHROUGH: -+#endif -+#ifdef HAVE_FDATASYNC -+ case SYNC_METHOD_FDATASYNC: -+#endif -+ sync_called = true; -+ default: -+ /* other method synced data already or it's not unrecognized. */ -+ break; -+ } -+ } + + /* Measure i/o timing to sync WAL data.*/ -+ if (track_wal_io_timing && sync_called) ++ if (track_wal_io_timing) + INSTR_TIME_SET_CURRENT(start); pgstat_report_wait_start(WAIT_EVENT_WAL_SYNC); switch (sync_method) -@@ -10610,6 +10654,19 @@ issue_xlog_fsync(int fd, XLogSegNo segno) +@@ -10610,6 +10635,30 @@ issue_xlog_fsync(int fd, XLogSegNo segno) } pgstat_report_wait_end(); + -+ if (sync_called) ++ /* ++ * check whether to sync WAL data to the disk right now because ++ * statistics must be incremented when syncing really occurred. ++ */ ++ if (enableFsync) + { -+ /* increment the i/o timing and the number of times to fsync WAL data */ -+ if (track_wal_io_timing) ++ if ((sync_method == SYNC_METHOD_FSYNC) || ++ (sync_method == SYNC_METHOD_FSYNC_WRITETHROUGH) || ++ (sync_method == SYNC_METHOD_FDATASYNC)) + { -+ INSTR_TIME_SET_CURRENT(duration); -+ INSTR_TIME_SUBTRACT(duration, start); -+ WalStats.m_wal_sync_time = INSTR_TIME_GET_MICROSEC(duration); -+ } ++ /* increment the i/o timing and the number of WAL data synced */ ++ if (track_wal_io_timing) ++ { ++ instr_time duration; ++ ++ INSTR_TIME_SET_CURRENT(duration); ++ INSTR_TIME_SUBTRACT(duration, start); ++ WalStats.m_wal_sync_time = INSTR_TIME_GET_MICROSEC(duration); ++ } + -+ WalStats.m_wal_sync++; ++ WalStats.m_wal_sync++; ++ } + } } @@ -246,11 +240,69 @@ w.stats_reset FROM pg_stat_get_wal() w; +diff --git a/src/backend/postmaster/checkpointer.c b/src/backend/postmaster/checkpointer.c +index 54a818bf61..5d14a97e56 100644 +--- a/src/backend/postmaster/checkpointer.c ++++ b/src/backend/postmaster/checkpointer.c +@@ -505,7 +505,7 @@ CheckpointerMain(void) + pgstat_send_bgwriter(); + + /* Send WAL statistics to the stats collector. */ +- pgstat_send_wal(); ++ pgstat_send_wal(true); + + /* + * If any checkpoint flags have been set, redo the loop to handle the diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c -index f75b52719d..987bbd058d 100644 +index f75b52719d..256d8706ca 100644 --- a/src/backend/postmaster/pgstat.c +++ b/src/backend/postmaster/pgstat.c -@@ -6892,6 +6892,10 @@ pgstat_recv_wal(PgStat_MsgWal *msg, int len) +@@ -975,7 +975,7 @@ pgstat_report_stat(bool disconnect) + pgstat_send_funcstats(); + + /* Send WAL statistics */ +- pgstat_send_wal(); ++ pgstat_send_wal(true); + + /* Finally send SLRU statistics */ + pgstat_send_slru(); +@@ -4669,17 +4669,33 @@ pgstat_send_bgwriter(void) + /* ---------- + * pgstat_send_wal() - + * +- * Send WAL statistics to the collector ++ * Send WAL statistics to the collector. ++ * ++ * If force is false, don't send a message unless it's been at ++ * least PGSTAT_STAT_INTERVAL msec since we last sent one. + * ---------- + */ + void +-pgstat_send_wal(void) ++pgstat_send_wal(bool force) + { + /* We assume this initializes to zeroes */ + static const PgStat_MsgWal all_zeroes; ++ static TimestampTz last_report = 0; + ++ TimestampTz now; + WalUsage walusage; + ++ /* ++ * Don't send a message unless it's been at least PGSTAT_STAT_INTERVAL ++ * msec since we last sent one or specified "force". ++ */ ++ now = GetCurrentTimestamp(); ++ if (!force && ++ !TimestampDifferenceExceeds(last_report, now, PGSTAT_STAT_INTERVAL)) ++ return; ++ ++ last_report = now; ++ + /* + * Calculate how much WAL usage counters are increased by substracting the + * previous counters from the current ones. Fill the results in WAL stats +@@ -6892,6 +6908,10 @@ pgstat_recv_wal(PgStat_MsgWal *msg, int len) walStats.wal_fpi += msg->m_wal_fpi; walStats.wal_bytes += msg->m_wal_bytes; walStats.wal_buffers_full += msg->m_wal_buffers_full; @@ -262,7 +314,7 @@ /* ---------- diff --git a/src/backend/postmaster/walwriter.c b/src/backend/postmaster/walwriter.c -index 4f1a8e356b..104cba4581 100644 +index 4f1a8e356b..7fd56d1497 100644 --- a/src/backend/postmaster/walwriter.c +++ b/src/backend/postmaster/walwriter.c @@ -253,6 +253,9 @@ WalWriterMain(void) @@ -270,11 +322,68 @@ left_till_hibernate--; + /* Send WAL statistics */ -+ pgstat_send_wal(); ++ pgstat_send_wal(true); + /* * Sleep until we are signaled or WalWriterDelay has elapsed. If we * haven't done anything useful for quite some time, lengthen the +diff --git a/src/backend/replication/walreceiver.c b/src/backend/replication/walreceiver.c +index 723f513d8b..901b194773 100644 +--- a/src/backend/replication/walreceiver.c ++++ b/src/backend/replication/walreceiver.c +@@ -485,7 +485,18 @@ WalReceiverMain(void) + + /* Check if we need to exit the streaming loop. */ + if (endofwal) ++ { ++ /* Send WAL statistics to the stats collector. */ ++ pgstat_send_wal(true); + break; ++ } ++ ++ /* ++ * Send WAL statistics to the stats collector. ++ * Don't send a message unless it's been at least PGSTAT_STAT_INTERVAL ++ * msec since we last sent one. ++ */ ++ pgstat_send_wal(false); + + /* + * Ideally we would reuse a WaitEventSet object repeatedly +@@ -874,6 +885,7 @@ XLogWalRcvWrite(char *buf, Size nbytes, XLogRecPtr recptr) + while (nbytes > 0) + { + int segbytes; ++ instr_time start; + + if (recvFile < 0 || !XLByteInSeg(recptr, recvSegNo, wal_segment_size)) + { +@@ -931,7 +943,25 @@ XLogWalRcvWrite(char *buf, Size nbytes, XLogRecPtr recptr) + /* OK to write the logs */ + errno = 0; + ++ ++ /* Measure i/o timing to write WAL data */ ++ if (track_wal_io_timing) ++ INSTR_TIME_SET_CURRENT(start); ++ + byteswritten = pg_pwrite(recvFile, buf, segbytes, (off_t) startoff); ++ ++ /* increment the i/o timing and the number of WAL data written */ ++ if (track_wal_io_timing) ++ { ++ instr_time duration; ++ ++ INSTR_TIME_SET_CURRENT(duration); ++ INSTR_TIME_SUBTRACT(duration, start); ++ WalStats.m_wal_write_time = INSTR_TIME_GET_MICROSEC(duration); ++ } ++ ++ WalStats.m_wal_write++; ++ + if (byteswritten <= 0) + { + char xlogfname[MAXFNAMELEN]; diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c index 62bff52638..7296ef04ff 100644 --- a/src/backend/utils/adt/pgstatfuncs.c @@ -394,7 +503,7 @@ { oid => '2306', descr => 'statistics: information about SLRU caches', proname => 'pg_stat_get_slru', prorows => '100', proisstrict => 'f', diff --git a/src/include/pgstat.h b/src/include/pgstat.h -index 724068cf87..e689d27480 100644 +index 724068cf87..8ef959c0cc 100644 --- a/src/include/pgstat.h +++ b/src/include/pgstat.h @@ -474,6 +474,10 @@ typedef struct PgStat_MsgWal @@ -419,6 +528,15 @@ TimestampTz stat_reset_timestamp; } PgStat_WalStats; +@@ -1590,7 +1598,7 @@ extern void pgstat_twophase_postabort(TransactionId xid, uint16 info, + + extern void pgstat_send_archiver(const char *xlog, bool failed); + extern void pgstat_send_bgwriter(void); +-extern void pgstat_send_wal(void); ++extern void pgstat_send_wal(bool force); + + /* ---------- + * Support functions for the SQL-callable functions to diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out index 6173473de9..bc3909fd17 100644 --- a/src/test/regress/expected/rules.out
From e9aad92097c5cff5565b67ce1a8ec6d7b4c8a4d9 Mon Sep 17 00:00:00 2001 From: Masahiro Ikeda <ikeda...@oss.nttdata.com> Date: Mon, 25 Jan 2021 16:26:04 +0900 Subject: [PATCH] Add statistics related to write/sync wal records. This patch adds following statistics to pg_stat_wal view to track WAL I/O activity. - the total number of writing/syncing WAL data. - the total amount of time that has been spent in writing/syncing WAL data. Since to track I/O timing may leads significant overhead, GUC parameter "track_wal_io_timing" is introduced. Only if this is on, the I/O timing is measured. The statistics related to sync are zero when "wal_sync_method" is "open_datasync" or "open_sync", because it leads syncing WAL data at same time when to write it. Author: Masahiro Ikeda Reviewed-By: Japin Li, Hayato Kuroda, Masahiko Sawada Discussion: https://postgr.es/m/0509ad67b585a5b86a83d445dfa75...@oss.nttdata.com (This requires a catversion bump, as well as an update to PGSTAT_FILE_FORMAT_ID) --- doc/src/sgml/config.sgml | 21 ++++++++ doc/src/sgml/monitoring.sgml | 48 ++++++++++++++++- src/backend/access/transam/xlog.c | 51 ++++++++++++++++++- src/backend/catalog/system_views.sql | 4 ++ src/backend/postmaster/checkpointer.c | 2 +- src/backend/postmaster/pgstat.c | 26 ++++++++-- src/backend/postmaster/walwriter.c | 3 ++ src/backend/replication/walreceiver.c | 30 +++++++++++ src/backend/utils/adt/pgstatfuncs.c | 24 +++++++-- src/backend/utils/misc/guc.c | 9 ++++ src/backend/utils/misc/postgresql.conf.sample | 1 + src/include/access/xlog.h | 1 + src/include/catalog/pg_proc.dat | 14 ++--- src/include/pgstat.h | 10 +++- src/test/regress/expected/rules.out | 6 ++- 15 files changed, 232 insertions(+), 18 deletions(-) diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index 82864bbb24..43f3fbcaf8 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -7416,6 +7416,27 @@ COPY postgres_log FROM '/full/path/to/logfile.csv' WITH csv; </listitem> </varlistentry> + <varlistentry id="guc-track-wal-io-timing" xreflabel="track_wal_io_timing"> + <term><varname>track_wal_io_timing</varname> (<type>boolean</type>) + <indexterm> + <primary><varname>track_wal_io_timing</varname> configuration parameter</primary> + </indexterm> + </term> + <listitem> + <para> + Enables timing of WAL I/O calls. This parameter is off by default, + because it will repeatedly query the operating system for + the current time, which may cause significant overhead on some + platforms. You can use the <xref linkend="pgtesttiming"/> tool to + measure the overhead of timing on your system. + I/O timing information is + displayed in <link linkend="monitoring-pg-stat-wal-view"> + <structname>pg_stat_wal</structname></link>. Only superusers can + change this setting. + </para> + </listitem> + </varlistentry> + <varlistentry id="guc-track-functions" xreflabel="track_functions"> <term><varname>track_functions</varname> (<type>enum</type>) <indexterm> diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml index f05140dd42..5a8fc4eb0c 100644 --- a/doc/src/sgml/monitoring.sgml +++ b/doc/src/sgml/monitoring.sgml @@ -3485,7 +3485,53 @@ SELECT pid, wait_event_type, wait_event FROM pg_stat_activity WHERE wait_event i <structfield>wal_buffers_full</structfield> <type>bigint</type> </para> <para> - Number of times WAL data was written to disk because WAL buffers became full + Total number of WAL data written to disk because WAL buffers became full + </para></entry> + </row> + + <row> + <entry role="catalog_table_entry"><para role="column_definition"> + <structfield>wal_write</structfield> <type>bigint</type> + </para> + <para> + Total number of WAL data written to disk + </para></entry> + </row> + + <row> + <entry role="catalog_table_entry"><para role="column_definition"> + <structfield>wal_write_time</structfield> <type>double precision</type> + </para> + <para> + Total amount of time that has been spent in the portion of + WAL data was written to disk, in milliseconds + (if <xref linkend="guc-track-wal-io-timing"/> is enabled, otherwise zero). + </para></entry> + </row> + + <row> + <entry role="catalog_table_entry"><para role="column_definition"> + <structfield>wal_sync</structfield> <type>bigint</type> + </para> + <para> + Total number of WAL data synced to disk + (if <xref linkend="guc-wal-sync-method"/> is <literal>open_datasync</literal> or + <literal>open_sync</literal>, this value is zero because WAL data is synced + when to write it). + </para></entry> + </row> + + <row> + <entry role="catalog_table_entry"><para role="column_definition"> + <structfield>wal_sync_time</structfield> <type>double precision</type> + </para> + <para> + Total amount of time that has been spent in the portion of + WAL data was synced to disk, in milliseconds + (if <xref linkend="guc-track-wal-io-timing"/> is enabled, otherwise zero. + if <xref linkend="guc-wal-sync-method"/> is <literal>open_datasync</literal> or + <literal>open_sync</literal>, this value is zero too because WAL data is synced + when to write it). </para></entry> </row> diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 470e113b33..1c4860bee7 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -110,6 +110,7 @@ int CommitDelay = 0; /* precommit delay in microseconds */ int CommitSiblings = 5; /* # concurrent xacts needed to sleep */ int wal_retrieve_retry_interval = 5000; int max_slot_wal_keep_size_mb = -1; +bool track_wal_io_timing = false; #ifdef WAL_DEBUG bool XLOG_DEBUG = false; @@ -2540,6 +2541,7 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible) Size nbytes; Size nleft; int written; + instr_time start; /* OK to write the page(s) */ from = XLogCtl->pages + startidx * (Size) XLOG_BLCKSZ; @@ -2548,9 +2550,27 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible) do { errno = 0; + + /* Measure i/o timing to write WAL data */ + if (track_wal_io_timing) + INSTR_TIME_SET_CURRENT(start); + pgstat_report_wait_start(WAIT_EVENT_WAL_WRITE); written = pg_pwrite(openLogFile, from, nleft, startoffset); pgstat_report_wait_end(); + + /* increment the i/o timing and the number of WAL data written */ + if (track_wal_io_timing) + { + instr_time duration; + + INSTR_TIME_SET_CURRENT(duration); + INSTR_TIME_SUBTRACT(duration, start); + WalStats.m_wal_write_time = INSTR_TIME_GET_MICROSEC(duration); + } + + WalStats.m_wal_write++; + if (written <= 0) { char xlogfname[MAXFNAMELEN]; @@ -10565,7 +10585,12 @@ assign_xlog_sync_method(int new_sync_method, void *extra) void issue_xlog_fsync(int fd, XLogSegNo segno) { - char *msg = NULL; + char *msg = NULL; + instr_time start; + + /* Measure i/o timing to sync WAL data.*/ + if (track_wal_io_timing) + INSTR_TIME_SET_CURRENT(start); pgstat_report_wait_start(WAIT_EVENT_WAL_SYNC); switch (sync_method) @@ -10610,6 +10635,30 @@ issue_xlog_fsync(int fd, XLogSegNo segno) } pgstat_report_wait_end(); + + /* + * check whether to sync WAL data to the disk right now because + * statistics must be incremented when syncing really occurred. + */ + if (enableFsync) + { + if ((sync_method == SYNC_METHOD_FSYNC) || + (sync_method == SYNC_METHOD_FSYNC_WRITETHROUGH) || + (sync_method == SYNC_METHOD_FDATASYNC)) + { + /* increment the i/o timing and the number of WAL data synced */ + if (track_wal_io_timing) + { + instr_time duration; + + INSTR_TIME_SET_CURRENT(duration); + INSTR_TIME_SUBTRACT(duration, start); + WalStats.m_wal_sync_time = INSTR_TIME_GET_MICROSEC(duration); + } + + WalStats.m_wal_sync++; + } + } } /* diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql index fa58afd9d7..b8ace4fc41 100644 --- a/src/backend/catalog/system_views.sql +++ b/src/backend/catalog/system_views.sql @@ -1004,6 +1004,10 @@ CREATE VIEW pg_stat_wal AS w.wal_fpi, w.wal_bytes, w.wal_buffers_full, + w.wal_write, + w.wal_write_time, + w.wal_sync, + w.wal_sync_time, w.stats_reset FROM pg_stat_get_wal() w; diff --git a/src/backend/postmaster/checkpointer.c b/src/backend/postmaster/checkpointer.c index 54a818bf61..5d14a97e56 100644 --- a/src/backend/postmaster/checkpointer.c +++ b/src/backend/postmaster/checkpointer.c @@ -505,7 +505,7 @@ CheckpointerMain(void) pgstat_send_bgwriter(); /* Send WAL statistics to the stats collector. */ - pgstat_send_wal(); + pgstat_send_wal(true); /* * If any checkpoint flags have been set, redo the loop to handle the diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c index f75b52719d..256d8706ca 100644 --- a/src/backend/postmaster/pgstat.c +++ b/src/backend/postmaster/pgstat.c @@ -975,7 +975,7 @@ pgstat_report_stat(bool disconnect) pgstat_send_funcstats(); /* Send WAL statistics */ - pgstat_send_wal(); + pgstat_send_wal(true); /* Finally send SLRU statistics */ pgstat_send_slru(); @@ -4669,17 +4669,33 @@ pgstat_send_bgwriter(void) /* ---------- * pgstat_send_wal() - * - * Send WAL statistics to the collector + * Send WAL statistics to the collector. + * + * If force is false, don't send a message unless it's been at + * least PGSTAT_STAT_INTERVAL msec since we last sent one. * ---------- */ void -pgstat_send_wal(void) +pgstat_send_wal(bool force) { /* We assume this initializes to zeroes */ static const PgStat_MsgWal all_zeroes; + static TimestampTz last_report = 0; + TimestampTz now; WalUsage walusage; + /* + * Don't send a message unless it's been at least PGSTAT_STAT_INTERVAL + * msec since we last sent one or specified "force". + */ + now = GetCurrentTimestamp(); + if (!force && + !TimestampDifferenceExceeds(last_report, now, PGSTAT_STAT_INTERVAL)) + return; + + last_report = now; + /* * Calculate how much WAL usage counters are increased by substracting the * previous counters from the current ones. Fill the results in WAL stats @@ -6892,6 +6908,10 @@ pgstat_recv_wal(PgStat_MsgWal *msg, int len) walStats.wal_fpi += msg->m_wal_fpi; walStats.wal_bytes += msg->m_wal_bytes; walStats.wal_buffers_full += msg->m_wal_buffers_full; + walStats.wal_write += msg->m_wal_write; + walStats.wal_write_time += msg->m_wal_write_time; + walStats.wal_sync += msg->m_wal_sync; + walStats.wal_sync_time += msg->m_wal_sync_time; } /* ---------- diff --git a/src/backend/postmaster/walwriter.c b/src/backend/postmaster/walwriter.c index 4f1a8e356b..7fd56d1497 100644 --- a/src/backend/postmaster/walwriter.c +++ b/src/backend/postmaster/walwriter.c @@ -253,6 +253,9 @@ WalWriterMain(void) else if (left_till_hibernate > 0) left_till_hibernate--; + /* Send WAL statistics */ + pgstat_send_wal(true); + /* * Sleep until we are signaled or WalWriterDelay has elapsed. If we * haven't done anything useful for quite some time, lengthen the diff --git a/src/backend/replication/walreceiver.c b/src/backend/replication/walreceiver.c index 723f513d8b..901b194773 100644 --- a/src/backend/replication/walreceiver.c +++ b/src/backend/replication/walreceiver.c @@ -485,7 +485,18 @@ WalReceiverMain(void) /* Check if we need to exit the streaming loop. */ if (endofwal) + { + /* Send WAL statistics to the stats collector. */ + pgstat_send_wal(true); break; + } + + /* + * Send WAL statistics to the stats collector. + * Don't send a message unless it's been at least PGSTAT_STAT_INTERVAL + * msec since we last sent one. + */ + pgstat_send_wal(false); /* * Ideally we would reuse a WaitEventSet object repeatedly @@ -874,6 +885,7 @@ XLogWalRcvWrite(char *buf, Size nbytes, XLogRecPtr recptr) while (nbytes > 0) { int segbytes; + instr_time start; if (recvFile < 0 || !XLByteInSeg(recptr, recvSegNo, wal_segment_size)) { @@ -931,7 +943,25 @@ XLogWalRcvWrite(char *buf, Size nbytes, XLogRecPtr recptr) /* OK to write the logs */ errno = 0; + + /* Measure i/o timing to write WAL data */ + if (track_wal_io_timing) + INSTR_TIME_SET_CURRENT(start); + byteswritten = pg_pwrite(recvFile, buf, segbytes, (off_t) startoff); + + /* increment the i/o timing and the number of WAL data written */ + if (track_wal_io_timing) + { + instr_time duration; + + INSTR_TIME_SET_CURRENT(duration); + INSTR_TIME_SUBTRACT(duration, start); + WalStats.m_wal_write_time = INSTR_TIME_GET_MICROSEC(duration); + } + + WalStats.m_wal_write++; + if (byteswritten <= 0) { char xlogfname[MAXFNAMELEN]; diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c index 62bff52638..7296ef04ff 100644 --- a/src/backend/utils/adt/pgstatfuncs.c +++ b/src/backend/utils/adt/pgstatfuncs.c @@ -1799,7 +1799,7 @@ pg_stat_get_buf_alloc(PG_FUNCTION_ARGS) Datum pg_stat_get_wal(PG_FUNCTION_ARGS) { -#define PG_STAT_GET_WAL_COLS 5 +#define PG_STAT_GET_WAL_COLS 9 TupleDesc tupdesc; Datum values[PG_STAT_GET_WAL_COLS]; bool nulls[PG_STAT_GET_WAL_COLS]; @@ -1820,7 +1820,15 @@ pg_stat_get_wal(PG_FUNCTION_ARGS) NUMERICOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 4, "wal_buffers_full", INT8OID, -1, 0); - TupleDescInitEntry(tupdesc, (AttrNumber) 5, "stats_reset", + TupleDescInitEntry(tupdesc, (AttrNumber) 5, "wal_write", + INT8OID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 6, "wal_write_time", + FLOAT8OID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 7, "wal_sync", + INT8OID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 8, "wal_sync_time", + FLOAT8OID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 9, "stats_reset", TIMESTAMPTZOID, -1, 0); BlessTupleDesc(tupdesc); @@ -1840,7 +1848,17 @@ pg_stat_get_wal(PG_FUNCTION_ARGS) Int32GetDatum(-1)); values[3] = Int64GetDatum(wal_stats->wal_buffers_full); - values[4] = TimestampTzGetDatum(wal_stats->stat_reset_timestamp); + values[4] = Int64GetDatum(wal_stats->wal_write); + + /* convert counter from microsec to millisec for display */ + values[5] = Float8GetDatum((double) wal_stats->wal_write_time / 1000.0); + + values[6] = Int64GetDatum(wal_stats->wal_sync); + + /* convert counter from microsec to millisec for display */ + values[7] = Float8GetDatum((double) wal_stats->wal_sync_time / 1000.0); + + values[8] = TimestampTzGetDatum(wal_stats->stat_reset_timestamp); /* Returns the record as Datum */ PG_RETURN_DATUM(HeapTupleGetDatum(heap_form_tuple(tupdesc, values, nulls))); diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index 17579eeaca..ac6f0cd4ef 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -1485,6 +1485,15 @@ static struct config_bool ConfigureNamesBool[] = false, NULL, NULL, NULL }, + { + {"track_wal_io_timing", PGC_SUSET, STATS_COLLECTOR, + gettext_noop("Collects timing statistics for WAL I/O activity."), + NULL + }, + &track_wal_io_timing, + false, + NULL, NULL, NULL + }, { {"update_process_title", PGC_SUSET, PROCESS_TITLE, diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index 8930a94fff..4dc79cf822 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -585,6 +585,7 @@ #track_activities = on #track_counts = on #track_io_timing = off +#track_wal_io_timing = off #track_functions = none # none, pl, all #track_activity_query_size = 1024 # (change requires restart) #stats_temp_directory = 'pg_stat_tmp' diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h index 75ec1073bd..1e53d9d4ca 100644 --- a/src/include/access/xlog.h +++ b/src/include/access/xlog.h @@ -131,6 +131,7 @@ extern int recovery_min_apply_delay; extern char *PrimaryConnInfo; extern char *PrimarySlotName; extern bool wal_receiver_create_temp_slot; +extern bool track_wal_io_timing; /* indirectly set via GUC system */ extern TransactionId recoveryTargetXid; diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index b5f52d4e4a..9fe8a72105 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -5531,13 +5531,13 @@ proname => 'pg_stat_get_buf_alloc', provolatile => 's', proparallel => 'r', prorettype => 'int8', proargtypes => '', prosrc => 'pg_stat_get_buf_alloc' }, -{ oid => '1136', descr => 'statistics: information about WAL activity', - proname => 'pg_stat_get_wal', proisstrict => 'f', provolatile => 's', - proparallel => 'r', prorettype => 'record', proargtypes => '', - proallargtypes => '{int8,int8,numeric,int8,timestamptz}', - proargmodes => '{o,o,o,o,o}', - proargnames => '{wal_records,wal_fpi,wal_bytes,wal_buffers_full,stats_reset}', - prosrc => 'pg_stat_get_wal' }, + { oid => '1136', descr => 'statistics: information about WAL activity', + proname => 'pg_stat_get_wal', proisstrict => 'f', provolatile => 's', + proparallel => 'r', prorettype => 'record', proargtypes => '', + proallargtypes => '{int8,int8,numeric,int8,int8,float8,int8,float8,timestamptz}', + proargmodes => '{o,o,o,o,o,o,o,o,o}', + proargnames => '{wal_records,wal_fpi,wal_bytes,wal_buffers_full,wal_write,wal_write_time,wal_sync,wal_sync_time,stats_reset}', + prosrc => 'pg_stat_get_wal' }, { oid => '2306', descr => 'statistics: information about SLRU caches', proname => 'pg_stat_get_slru', prorows => '100', proisstrict => 'f', diff --git a/src/include/pgstat.h b/src/include/pgstat.h index 724068cf87..8ef959c0cc 100644 --- a/src/include/pgstat.h +++ b/src/include/pgstat.h @@ -474,6 +474,10 @@ typedef struct PgStat_MsgWal PgStat_Counter m_wal_fpi; uint64 m_wal_bytes; PgStat_Counter m_wal_buffers_full; + PgStat_Counter m_wal_write; + PgStat_Counter m_wal_write_time; /* time spend writing wal records in micro seconds */ + PgStat_Counter m_wal_sync; + PgStat_Counter m_wal_sync_time; /* time spend syncing wal records in micro seconds */ } PgStat_MsgWal; /* ---------- @@ -839,6 +843,10 @@ typedef struct PgStat_WalStats PgStat_Counter wal_fpi; uint64 wal_bytes; PgStat_Counter wal_buffers_full; + PgStat_Counter wal_write; + PgStat_Counter wal_write_time; + PgStat_Counter wal_sync; + PgStat_Counter wal_sync_time; TimestampTz stat_reset_timestamp; } PgStat_WalStats; @@ -1590,7 +1598,7 @@ extern void pgstat_twophase_postabort(TransactionId xid, uint16 info, extern void pgstat_send_archiver(const char *xlog, bool failed); extern void pgstat_send_bgwriter(void); -extern void pgstat_send_wal(void); +extern void pgstat_send_wal(bool force); /* ---------- * Support functions for the SQL-callable functions to diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out index 6173473de9..bc3909fd17 100644 --- a/src/test/regress/expected/rules.out +++ b/src/test/regress/expected/rules.out @@ -2158,8 +2158,12 @@ pg_stat_wal| SELECT w.wal_records, w.wal_fpi, w.wal_bytes, w.wal_buffers_full, + w.wal_write, + w.wal_write_time, + w.wal_sync, + w.wal_sync_time, w.stats_reset - FROM pg_stat_get_wal() w(wal_records, wal_fpi, wal_bytes, wal_buffers_full, stats_reset); + FROM pg_stat_get_wal() w(wal_records, wal_fpi, wal_bytes, wal_buffers_full, wal_write, wal_write_time, wal_sync, wal_sync_time, stats_reset); pg_stat_wal_receiver| SELECT s.pid, s.status, s.receive_start_lsn, -- 2.25.1