Hi,

Thanks for your comments and advice. I updated the patch.

On 2020-10-21 18:03, Kyotaro Horiguchi wrote:
At Tue, 20 Oct 2020 16:11:29 +0900, Masahiro Ikeda
<ikeda...@oss.nttdata.com> wrote in
On 2020-10-20 12:46, Amit Kapila wrote:
> I see that we also need to add extra code to capture these stats (some
> of which is in performance-critical path especially in
> XLogInsertRecord) which again makes me a bit uncomfortable. It might
> be that it is all fine as it is very important to collect these stats
> at cluster-level in spite that the same information can be gathered at
> statement-level to help customers but I don't see a very strong case
> for that in your proposal.

We should avoid that duplication as possible even if the both number
are important.

Also about performance, I thought there are few impacts because it
increments stats in memory. If I can implement to reuse pgWalUsage's
value which already collects these stats, there is no impact in
XLogInsertRecord.
For example, how about pg_stat_wal() calculates the accumulated
value of wal_records, wal_fpi, and wal_bytes to use pgWalUsage's
value?

I don't think that works, but it would work that pgstat_send_wal()
takes the difference of that values between two successive calls.

WalUsage prevWalUsage;
...
pgstat_send_wal()
{
..
   /* fill in some values using pgWalUsage */
WalStats.m_wal_bytes = pgWalUsage.wal_bytes - prevWalUsage.wal_bytes; WalStats.m_wal_records = pgWalUsage.wal_records - prevWalUsage.wal_records; WalStats.m_wal_wal_fpi = pgWalUsage.wal_fpi - prevWalUsage.wal_fpi;
...
   pgstat_send(&WalStats, sizeof(WalStats));

   /* remember the current numbers */
   prevWalUsage = pgWalUsage;

Thanks for Horiguchi-san's advice, I changed to reuse pgWalUsage
which is already defined and eliminates the extra overhead.

5. I notice some code in issue_xlog_fsync() function to collect sync info,
a standby may call the issue_xlog_fsync() too, which do not want to
to collect this info. I think this need some change avoid run by
standby side.

IIUC, issue_xlog_fsync is called by wal receiver on standby side.
But it doesn't send collected statistics to the stats collecter.
So, I think it's not necessary to change the code to avoid collecting the stats on the standby side.

Regards,
--
Masahiro Ikeda
NTT DATA CORPORATION
diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml
index 98e19954..587acd10 100644
--- a/doc/src/sgml/monitoring.sgml
+++ b/doc/src/sgml/monitoring.sgml
@@ -3447,12 +3447,106 @@ SELECT pid, wait_event_type, wait_event FROM pg_stat_activity WHERE wait_event i
     </thead>
 
     <tbody>
+     <row>
+      <entry role="catalog_table_entry"><para role="column_definition">
+       <structfield>wal_records</structfield> <type>bigint</type>
+      </para>
+      <para>
+       Total number of WAL records generated
+      </para></entry>
+     </row>
+
+     <row>
+      <entry role="catalog_table_entry"><para role="column_definition">
+       <structfield>wal_fpi</structfield> <type>bigint</type>
+      </para>
+      <para>
+       Total number of WAL full page images generated
+      </para></entry>
+     </row>
+
+     <row>
+      <entry role="catalog_table_entry"><para role="column_definition">
+       <structfield>wal_bytes</structfield> <type>bigint</type>
+      </para>
+      <para>
+       Total amount of WAL bytes generated
+      </para></entry>
+     </row>
+
      <row>
       <entry role="catalog_table_entry"><para role="column_definition">
        <structfield>wal_buffers_full</structfield> <type>bigint</type>
       </para>
       <para>
-       Number of times WAL data was written to the disk because WAL buffers got full
+       Total number of times WAL data written to the disk because WAL buffers got full
+      </para></entry>
+     </row>
+
+     <row>
+      <entry role="catalog_table_entry"><para role="column_definition">
+       <structfield>wal_init_file</structfield> <type>bigint</type>
+      </para>
+      <para>
+       Total number of WAL file segment created
+      </para></entry>
+     </row>
+
+     <row>
+      <entry role="catalog_table_entry"><para role="column_definition">
+       <structfield>wal_write_backend</structfield> <type>bigint</type>
+      </para>
+      <para>
+       Total number of times backends write WAL data to the disk
+      </para></entry>
+     </row>
+
+     <row>
+      <entry role="catalog_table_entry"><para role="column_definition">
+       <structfield>wal_write_walwriter</structfield> <type>bigint</type>
+      </para>
+      <para>
+       Total number of times walwriter writes WAL data to the disk
+      </para></entry>
+     </row>
+
+     <row>
+      <entry role="catalog_table_entry"><para role="column_definition">
+       <structfield>wal_write_time</structfield> <type>bigint</type>
+      </para>
+      <para>
+       Total amount of time that has been spent in the portion of
+       WAL data was written to disk by backend and walwriter, in milliseconds
+       (if <xref linkend="guc-track-io-timing"/> is enabled, otherwise zero)
+      </para></entry>
+     </row>
+
+     <row>
+      <entry role="catalog_table_entry"><para role="column_definition">
+       <structfield>wal_sync_backend</structfield> <type>bigint</type>
+      </para>
+      <para>
+       Total number of times backends sync WAL data to the disk
+      </para></entry>
+     </row>
+
+     <row>
+      <entry role="catalog_table_entry"><para role="column_definition">
+       <structfield>wal_sync_walwriter</structfield> <type>bigint</type>
+      </para>
+      <para>
+       Total number of times walwriter syncs WAL data synced to the disk
+      </para></entry>
+     </row>
+
+     <row>
+      <entry role="catalog_table_entry"><para role="column_definition">
+       <structfield>wal_sync_time</structfield> <type>bigint</type>
+      </para>
+      <para>
+       Total amount of time that has been spent in the portion of
+       WAL data was synced to disk by backend and walwriter, in milliseconds
+       (if <xref linkend="guc-track-io-timing"/> is enabled, otherwise zero)
       </para></entry>
      </row>
 
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 52a67b11..77fe977c 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -2527,6 +2527,8 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible)
 			Size		nbytes;
 			Size		nleft;
 			int			written;
+			instr_time	start;
+			instr_time	duration;
 
 			/* OK to write the page(s) */
 			from = XLogCtl->pages + startidx * (Size) XLOG_BLCKSZ;
@@ -2535,9 +2537,28 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible)
 			do
 			{
 				errno = 0;
+				if (track_io_timing)
+					INSTR_TIME_SET_CURRENT(start);
+
 				pgstat_report_wait_start(WAIT_EVENT_WAL_WRITE);
 				written = pg_pwrite(openLogFile, from, nleft, startoffset);
 				pgstat_report_wait_end();
+
+				if (track_io_timing)
+				{
+					INSTR_TIME_SET_CURRENT(duration);
+					INSTR_TIME_SUBTRACT(duration, start);
+					WalStats.m_wal_write_time += INSTR_TIME_GET_MILLISEC(duration);
+				}
+
+				if (AmWalWriterProcess()){
+					WalStats.m_wal_write_walwriter++;
+				}
+				else
+				{
+					WalStats.m_wal_write_backend++;
+				}
+
 				if (written <= 0)
 				{
 					char		xlogfname[MAXFNAMELEN];
@@ -3418,6 +3439,7 @@ XLogFileInit(XLogSegNo logsegno, bool *use_existent, bool use_lock)
 				 errmsg("could not open file \"%s\": %m", path)));
 
 	elog(DEBUG2, "done creating and filling new WAL file");
+	WalStats.m_wal_init_file++;
 
 	return fd;
 }
@@ -10427,8 +10449,13 @@ assign_xlog_sync_method(int new_sync_method, void *extra)
 void
 issue_xlog_fsync(int fd, XLogSegNo segno)
 {
+	instr_time	start;
+	instr_time	duration;
 	char	   *msg = NULL;
 
+	if (track_io_timing)
+		INSTR_TIME_SET_CURRENT(start);
+
 	pgstat_report_wait_start(WAIT_EVENT_WAL_SYNC);
 	switch (sync_method)
 	{
@@ -10472,6 +10499,21 @@ issue_xlog_fsync(int fd, XLogSegNo segno)
 	}
 
 	pgstat_report_wait_end();
+
+	if (track_io_timing)
+	{
+		INSTR_TIME_SET_CURRENT(duration);
+		INSTR_TIME_SUBTRACT(duration, start);
+		WalStats.m_wal_sync_time += INSTR_TIME_GET_MILLISEC(duration);
+	}
+
+	if (AmWalWriterProcess()){
+		WalStats.m_wal_sync_walwriter++;
+	}
+	else
+	{
+		WalStats.m_wal_sync_backend++;
+	}
 }
 
 /*
diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql
index 5171ea05..64618e41 100644
--- a/src/backend/catalog/system_views.sql
+++ b/src/backend/catalog/system_views.sql
@@ -993,7 +993,17 @@ CREATE VIEW pg_stat_bgwriter AS
 
 CREATE VIEW pg_stat_wal AS
     SELECT
+        w.wal_records,
+        w.wal_fpi,
+        w.wal_bytes,
         w.wal_buffers_full,
+        w.wal_init_file,
+        w.wal_write_backend,
+        w.wal_write_walwriter,
+        w.wal_write_time,
+        w.wal_sync_backend,
+        w.wal_sync_walwriter,
+        w.wal_sync_time,
         w.stats_reset
     FROM pg_stat_get_wal() w;
 
diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c
index f1dca2f2..1369ea54 100644
--- a/src/backend/postmaster/pgstat.c
+++ b/src/backend/postmaster/pgstat.c
@@ -41,6 +41,7 @@
 #include "catalog/pg_database.h"
 #include "catalog/pg_proc.h"
 #include "common/ip.h"
+#include "executor/instrument.h"
 #include "libpq/libpq.h"
 #include "libpq/pqsignal.h"
 #include "mb/pg_wchar.h"
@@ -142,6 +143,7 @@ char	   *pgstat_stat_tmpname = NULL;
  */
 PgStat_MsgBgWriter BgWriterStats;
 PgStat_MsgWal WalStats;
+WalUsage prevWalUsage;
 
 /*
  * List of SLRU names that we keep stats for.  There is no central registry of
@@ -4574,6 +4576,11 @@ pgstat_send_bgwriter(void)
 void
 pgstat_send_wal(void)
 {
+	/* fill in some values using pgWalUsage */
+	WalStats.m_wal_bytes = pgWalUsage.wal_bytes - prevWalUsage.wal_bytes;
+	WalStats.m_wal_records = pgWalUsage.wal_records - prevWalUsage.wal_records;
+	WalStats.m_wal_fpi = pgWalUsage.wal_fpi - prevWalUsage.wal_fpi;
+
 	/* We assume this initializes to zeroes */
 	static const PgStat_MsgWal all_zeroes;
 
@@ -4591,6 +4598,9 @@ pgstat_send_wal(void)
 	pgstat_setheader(&WalStats.m_hdr, PGSTAT_MTYPE_WAL);
 	pgstat_send(&WalStats, sizeof(WalStats));
 
+	/* remember the current numbers */
+	prevWalUsage = pgWalUsage;
+
 	/*
 	 * Clear out the statistics buffer, so it can be re-used.
 	 */
@@ -6759,7 +6769,17 @@ pgstat_recv_bgwriter(PgStat_MsgBgWriter *msg, int len)
 static void
 pgstat_recv_wal(PgStat_MsgWal *msg, int len)
 {
+	walStats.wal_records += msg->m_wal_records;
+	walStats.wal_fpi += msg->m_wal_fpi;
+	walStats.wal_bytes += msg->m_wal_bytes;
 	walStats.wal_buffers_full += msg->m_wal_buffers_full;
+	walStats.wal_init_file += msg->m_wal_init_file;
+	walStats.wal_write_backend += msg->m_wal_write_backend;
+	walStats.wal_write_walwriter += msg->m_wal_write_walwriter;
+	walStats.wal_write_time += msg->m_wal_write_time;
+	walStats.wal_sync_backend += msg->m_wal_sync_backend;
+	walStats.wal_sync_walwriter += msg->m_wal_sync_walwriter;
+	walStats.wal_sync_time += msg->m_wal_sync_time;
 }
 
 /* ----------
diff --git a/src/backend/postmaster/walwriter.c b/src/backend/postmaster/walwriter.c
index a52832fe..ce9f4b7c 100644
--- a/src/backend/postmaster/walwriter.c
+++ b/src/backend/postmaster/walwriter.c
@@ -253,6 +253,9 @@ WalWriterMain(void)
 		else if (left_till_hibernate > 0)
 			left_till_hibernate--;
 
+		/* Send WAL statistics */
+		pgstat_send_wal();
+
 		/*
 		 * Sleep until we are signaled or WalWriterDelay has elapsed.  If we
 		 * haven't done anything useful for quite some time, lengthen the
diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c
index a210fc93..7e800edb 100644
--- a/src/backend/utils/adt/pgstatfuncs.c
+++ b/src/backend/utils/adt/pgstatfuncs.c
@@ -1703,7 +1703,7 @@ pg_stat_get_buf_alloc(PG_FUNCTION_ARGS)
 Datum
 pg_stat_get_wal(PG_FUNCTION_ARGS)
 {
-#define PG_STAT_GET_WAL_COLS	2
+#define PG_STAT_GET_WAL_COLS	12
 	TupleDesc	tupdesc;
 	Datum		values[PG_STAT_GET_WAL_COLS];
 	bool		nulls[PG_STAT_GET_WAL_COLS];
@@ -1715,9 +1715,29 @@ pg_stat_get_wal(PG_FUNCTION_ARGS)
 
 	/* Initialise attributes information in the tuple descriptor */
 	tupdesc = CreateTemplateTupleDesc(PG_STAT_GET_WAL_COLS);
-	TupleDescInitEntry(tupdesc, (AttrNumber) 1, "wal_buffers_full",
+	TupleDescInitEntry(tupdesc, (AttrNumber) 1, "wal_records",
 					   INT8OID, -1, 0);
-	TupleDescInitEntry(tupdesc, (AttrNumber) 2, "stats_reset",
+	TupleDescInitEntry(tupdesc, (AttrNumber) 2, "wal_fpi",
+					   INT8OID, -1, 0);
+	TupleDescInitEntry(tupdesc, (AttrNumber) 3, "wal_bytes",
+					   INT8OID, -1, 0);
+	TupleDescInitEntry(tupdesc, (AttrNumber) 4, "wal_buffers_full",
+					   INT8OID, -1, 0);
+	TupleDescInitEntry(tupdesc, (AttrNumber) 5, "wal_init_file",
+					   INT8OID, -1, 0);
+	TupleDescInitEntry(tupdesc, (AttrNumber) 6, "wal_write_backend",
+					   INT8OID, -1, 0);
+	TupleDescInitEntry(tupdesc, (AttrNumber) 7, "wal_write_walwriter",
+					   INT8OID, -1, 0);
+	TupleDescInitEntry(tupdesc, (AttrNumber) 8, "wal_write_time",
+					   INT8OID, -1, 0);
+	TupleDescInitEntry(tupdesc, (AttrNumber) 9, "wal_sync_backend",
+					   INT8OID, -1, 0);
+	TupleDescInitEntry(tupdesc, (AttrNumber) 10, "wal_sync_walwriter",
+					   INT8OID, -1, 0);
+	TupleDescInitEntry(tupdesc, (AttrNumber) 11, "wal_sync_time",
+					   INT8OID, -1, 0);
+	TupleDescInitEntry(tupdesc, (AttrNumber) 12, "stats_reset",
 					   TIMESTAMPTZOID, -1, 0);
 
 	BlessTupleDesc(tupdesc);
@@ -1726,8 +1746,18 @@ pg_stat_get_wal(PG_FUNCTION_ARGS)
 	wal_stats = pgstat_fetch_stat_wal();
 
 	/* Fill values and NULLs */
-	values[0] = Int64GetDatum(wal_stats->wal_buffers_full);
-	values[1] = TimestampTzGetDatum(wal_stats->stat_reset_timestamp);
+	values[0] = Int64GetDatum(wal_stats->wal_records);
+	values[1] = Int64GetDatum(wal_stats->wal_fpi);
+	values[2] = Int64GetDatum(wal_stats->wal_bytes);
+	values[3] = Int64GetDatum(wal_stats->wal_buffers_full);
+	values[4] = Int64GetDatum(wal_stats->wal_init_file);
+	values[5] = Int64GetDatum(wal_stats->wal_write_backend);
+	values[6] = Int64GetDatum(wal_stats->wal_write_walwriter);
+	values[7] = Int64GetDatum(wal_stats->wal_write_time);
+	values[8] = Int64GetDatum(wal_stats->wal_sync_backend);
+	values[9] = Int64GetDatum(wal_stats->wal_sync_walwriter);
+	values[10] = Int64GetDatum(wal_stats->wal_sync_time);
+	values[11] = TimestampTzGetDatum(wal_stats->stat_reset_timestamp);
 
 	/* Returns the record as Datum */
 	PG_RETURN_DATUM(HeapTupleGetDatum(heap_form_tuple(tupdesc, values, nulls)));
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index d9770bba..e65c5a8e 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -5491,9 +5491,9 @@
 { oid => '1136', descr => 'statistics: information about WAL activity',
   proname => 'pg_stat_get_wal', proisstrict => 'f', provolatile => 's',
   proparallel => 'r', prorettype => 'record', proargtypes => '',
-  proallargtypes => '{int8,timestamptz}',
-  proargmodes => '{o,o}',
-  proargnames => '{wal_buffers_full,stats_reset}',
+  proallargtypes => '{int8,int8,int8,int8,int8,int8,int8,int8,int8,int8,int8,timestamptz}',
+  proargmodes => '{o,o,o,o,o,o,o,o,o,o,o,o}',
+  proargnames => '{wal_records,wal_fpi,wal_bytes,wal_buffers_full,wal_init_file,wal_write_backend,wal_write_walwriter,wal_write_time,wal_sync_backend,wal_sync_walwriter,wal_sync_time,stats_reset}',
   prosrc => 'pg_stat_get_wal' },
 
 { oid => '2306', descr => 'statistics: information about SLRU caches',
diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index 257e515b..c32f3a25 100644
--- a/src/include/pgstat.h
+++ b/src/include/pgstat.h
@@ -459,7 +459,17 @@ typedef struct PgStat_MsgBgWriter
 typedef struct PgStat_MsgWal
 {
 	PgStat_MsgHdr m_hdr;
+	PgStat_Counter m_wal_records;
+	PgStat_Counter m_wal_fpi;
+	PgStat_Counter m_wal_bytes;
 	PgStat_Counter m_wal_buffers_full;
+	PgStat_Counter m_wal_init_file;
+	PgStat_Counter m_wal_write_backend;
+	PgStat_Counter m_wal_write_walwriter;
+	PgStat_Counter m_wal_write_time;	/* accumulate times in milliseconds */
+	PgStat_Counter m_wal_sync_backend;
+	PgStat_Counter m_wal_sync_walwriter;
+	PgStat_Counter m_wal_sync_time; /* accumulate times in milliseconds */
 } PgStat_MsgWal;
 
 /* ----------
@@ -798,7 +808,18 @@ typedef struct PgStat_GlobalStats
  */
 typedef struct PgStat_WalStats
 {
+	PgStat_Counter wal_records;
+	PgStat_Counter wal_fpi;
+	PgStat_Counter wal_bytes;
 	PgStat_Counter wal_buffers_full;
+	PgStat_Counter wal_init_file;
+	PgStat_Counter wal_write_backend;
+	PgStat_Counter wal_write_walwriter;
+	PgStat_Counter wal_write_time;
+	PgStat_Counter wal_sync_backend;
+	PgStat_Counter wal_sync_walwriter;
+	PgStat_Counter wal_sync_time;
+
 	TimestampTz stat_reset_timestamp;
 } PgStat_WalStats;
 
diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out
index 097ff5d1..6ecc3601 100644
--- a/src/test/regress/expected/rules.out
+++ b/src/test/regress/expected/rules.out
@@ -2138,9 +2138,19 @@ pg_stat_user_tables| SELECT pg_stat_all_tables.relid,
     pg_stat_all_tables.autoanalyze_count
    FROM pg_stat_all_tables
   WHERE ((pg_stat_all_tables.schemaname <> ALL (ARRAY['pg_catalog'::name, 'information_schema'::name])) AND (pg_stat_all_tables.schemaname !~ '^pg_toast'::text));
-pg_stat_wal| SELECT w.wal_buffers_full,
+pg_stat_wal| SELECT w.wal_records,
+    w.wal_fpi,
+    w.wal_bytes,
+    w.wal_buffers_full,
+    w.wal_init_file,
+    w.wal_write_backend,
+    w.wal_write_walwriter,
+    w.wal_write_time,
+    w.wal_sync_backend,
+    w.wal_sync_walwriter,
+    w.wal_sync_time,
     w.stats_reset
-   FROM pg_stat_get_wal() w(wal_buffers_full, stats_reset);
+   FROM pg_stat_get_wal() w(wal_records, wal_fpi, wal_bytes, wal_buffers_full, wal_init_file, wal_write_backend, wal_write_walwriter, wal_write_time, wal_sync_backend, wal_sync_walwriter, wal_sync_time, stats_reset);
 pg_stat_wal_receiver| SELECT s.pid,
     s.status,
     s.receive_start_lsn,

Reply via email to