Hi, thanks for useful comments.
I agree to expose the number of WAL write caused by full of WAL
buffers.
It's helpful when tuning wal_buffers size. Haribabu separated that
number
into two fields in his patch; one is the number of WAL write by
backend,
and another is by background processes and workers. But I'm not sure
how useful such separation is. I'm ok with just one field for that
number.
I agree with you. I don't think we need to separate the numbers for
foreground processes and background ones. WAL buffer is a single
resource. So "Writes due to full WAL buffer are happening. We may be
able to boost performance by increasing wal_buffers" would be enough.
I made a patch to expose the number of WAL write caused by full of WAL
buffers.
I'm going to submit this patch to commitfests.
As Fujii-san and Tsunakawa-san said, it expose the total number
since I agreed that we don't need to separate the numbers for
foreground processes and background ones.
By the way, do we need to add another metrics related to WAL?
For example, is the total number of WAL writes to the buffers useful to
calculate the dirty WAL write ratio?
Is it enough as a first step?
Regards,
--
Masahiro Ikeda
NTT DATA CORPORATION
diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml
index 7dcddf478a..d49e539da3 100644
--- a/doc/src/sgml/monitoring.sgml
+++ b/doc/src/sgml/monitoring.sgml
@@ -424,6 +424,14 @@ postgres 27093 0.0 0.0 30096 2752 ? Ss 11:34 0:00 postgres: ser
</entry>
</row>
+ <row>
+ <entry><structname>pg_stat_walwrites</structname><indexterm><primary>pg_stat_walwrites</primary></indexterm></entry>
+ <entry>One row only, showing statistics about the
+ WAL writing activity. See
+ <xref linkend="monitoring-pg-stat-walwrites-view"/> for details.
+ </entry>
+ </row>
+
<row>
<entry><structname>pg_stat_database</structname><indexterm><primary>pg_stat_database</primary></indexterm></entry>
<entry>One row per database, showing database-wide statistics. See
@@ -3244,6 +3252,48 @@ SELECT pid, wait_event_type, wait_event FROM pg_stat_activity WHERE wait_event i
</sect2>
+ <sect2 id="monitoring-pg-stat-walwrites-view">
+ <title><structname>pg_stat_walwrites</structname></title>
+
+ <indexterm>
+ <primary>pg_stat_walwrites</primary>
+ </indexterm>
+
+ <para>
+ The <structname>pg_stat_walwrites</structname> view will always have a
+ single row, containing data about the WAL writing activity of the cluster.
+ </para>
+
+ <table id="pg-stat-walwrites-view" xreflabel="pg_stat_walwrites">
+ <title><structname>pg_stat_walwrites</structname> View</title>
+ <tgroup cols="1">
+ <thead>
+ <row>
+ <entry role="catalog_table_entry"><para role="column_definition">
+ Column Type
+ </para>
+ <para>
+ Description
+ </para></entry>
+ </row>
+ </thead>
+
+ <tbody>
+ <row>
+ <entry role="catalog_table_entry"><para role="column_definition">
+ <structfield>dirty_writes</structfield> <type>bigint</type>
+ </para>
+ <para>
+ Number of dirty WAL writes that are carried out by background processes and workers
+ when the <xref linkend="guc-wal-buffers"/> are full.
+ </para></entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+</sect2>
+
<sect2 id="monitoring-pg-stat-database-view">
<title><structname>pg_stat_database</structname></title>
@@ -4632,8 +4682,9 @@ SELECT pid, wait_event_type, wait_event FROM pg_stat_activity WHERE wait_event i
argument. The argument can be <literal>bgwriter</literal> to reset
all the counters shown in
the <structname>pg_stat_bgwriter</structname>
- view,or <literal>archiver</literal> to reset all the counters shown in
- the <structname>pg_stat_archiver</structname> view.
+ view, <literal>archiver</literal> to reset all the counters shown in
+ the <structname>pg_stat_archiver</structname> view ,or <literal>walwrites</literal>
+ to reset all the counters shown in the <structname>pg_stat_walwrites</structname> view.
</para>
<para>
This function is restricted to superusers by default, but other users
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 756b838e6a..66abd200d1 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -2193,6 +2193,7 @@ AdvanceXLInsertBuffer(XLogRecPtr upto, bool opportunistic)
WriteRqst.Write = OldPageRqstPtr;
WriteRqst.Flush = 0;
XLogWrite(WriteRqst, false);
+ WALWriteStats->dirty_writes++;
LWLockRelease(WALWriteLock);
TRACE_POSTGRESQL_WAL_BUFFER_WRITE_DIRTY_DONE();
}
diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql
index 8625cbeab6..cfc6a13b6a 100644
--- a/src/backend/catalog/system_views.sql
+++ b/src/backend/catalog/system_views.sql
@@ -994,6 +994,9 @@ CREATE VIEW pg_stat_progress_analyze AS
FROM pg_stat_get_progress_info('ANALYZE') AS S
LEFT JOIN pg_database D ON S.datid = D.oid;
+CREATE VIEW pg_stat_walwrites AS
+ SELECT * FROM pg_stat_get_walwrites() AS A;
+
CREATE VIEW pg_stat_progress_vacuum AS
SELECT
S.pid AS pid, S.datid AS datid, D.datname AS datname,
diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c
index 15f92b66c6..17feecd1a5 100644
--- a/src/backend/postmaster/pgstat.c
+++ b/src/backend/postmaster/pgstat.c
@@ -167,6 +167,15 @@ static const char *const slru_names[] = {
*/
static PgStat_MsgSLRU SLRUStats[SLRU_NUM_ELEMENTS];
+/*
+ * WalWrites Local statistics counters.
+ * The statistics data gets populated in XLogWrite function.
+ * Stored directly in a stats message structure so it can be sent
+ * to stats collector process without needing to copy things around.
+ * We assume this inits to zeroes.
+ */
+PgStat_WalWritesStats *WALWriteStats;
+
/* ----------
* Local data
* ----------
@@ -657,6 +666,29 @@ startup_failed:
SetConfigOption("track_counts", "off", PGC_INTERNAL, PGC_S_OVERRIDE);
}
+/*
+ * Initialization of shared memory for WALWritesStats
+ */
+Size
+WALWritesShmemSize(void)
+{
+ return sizeof(PgStat_WalWritesStats);
+}
+
+void
+WALWritesShmemInit(void)
+{
+ bool foundWALWrites;
+
+ WALWriteStats = (PgStat_WalWritesStats *)
+ ShmemInitStruct("WAL WriteStats", WALWritesShmemSize(), &foundWALWrites);
+
+ if (!foundWALWrites)
+ {
+ MemSet(WALWriteStats, 0, sizeof(PgStat_WalWritesStats));
+ }
+}
+
/*
* subroutine for pgstat_reset_all
*/
@@ -1370,11 +1402,25 @@ pgstat_reset_shared_counters(const char *target)
msg.m_resettarget = RESET_ARCHIVER;
else if (strcmp(target, "bgwriter") == 0)
msg.m_resettarget = RESET_BGWRITER;
+ else if (strcmp(target, "walwrites") == 0)
+ {
+ /*
+ * Reset the wal writes statistics of the cluster. These statistics
+ * are not reset by the stats collector because these are resides in a
+ * shared memory, so it is not possible for the stats collector to
+ * reset them. FIXME: This may need a sepearate function entirely to
+ * reset the stats.
+ */
+ LWLockAcquire(WALWriteLock, LW_EXCLUSIVE);
+ memset(WALWriteStats, 0, sizeof(PgStat_WalWritesStats));
+ WALWriteStats->stat_reset_timestamp = GetCurrentTimestamp();
+ LWLockRelease(WALWriteLock);
+ }
else
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("unrecognized reset target: \"%s\"", target),
- errhint("Target must be \"archiver\" or \"bgwriter\".")));
+ errhint("Target must be \"archiver\" or \"bgwriter\" or \"walwrites\".")));
pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_RESETSHAREDCOUNTER);
pgstat_send(&msg, sizeof(msg));
diff --git a/src/backend/storage/ipc/ipci.c b/src/backend/storage/ipc/ipci.c
index 96c2aaabbd..a49760ee24 100644
--- a/src/backend/storage/ipc/ipci.c
+++ b/src/backend/storage/ipc/ipci.c
@@ -149,6 +149,7 @@ CreateSharedMemoryAndSemaphores(void)
size = add_size(size, BTreeShmemSize());
size = add_size(size, SyncScanShmemSize());
size = add_size(size, AsyncShmemSize());
+ size = add_size(size, WALWritesShmemSize());
#ifdef EXEC_BACKEND
size = add_size(size, ShmemBackendArraySize());
#endif
@@ -216,6 +217,7 @@ CreateSharedMemoryAndSemaphores(void)
* Set up xlog, clog, and buffers
*/
XLOGShmemInit();
+ WALWritesShmemInit();
CLOGShmemInit();
CommitTsShmemInit();
SUBTRANSShmemInit();
diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c
index 95738a4e34..12d762ef6a 100644
--- a/src/backend/utils/adt/pgstatfuncs.c
+++ b/src/backend/utils/adt/pgstatfuncs.c
@@ -2098,3 +2098,38 @@ pg_stat_get_archiver(PG_FUNCTION_ARGS)
/* Returns the record as Datum */
PG_RETURN_DATUM(HeapTupleGetDatum(heap_form_tuple(tupdesc, values, nulls)));
}
+
+Datum
+pg_stat_get_walwrites(PG_FUNCTION_ARGS)
+{
+ TupleDesc tupdesc;
+ TimestampTz result;
+#define NUM_PG_STAT_WALWRITE_COLS 2
+ Datum values[NUM_PG_STAT_WALWRITE_COLS];
+ bool nulls[NUM_PG_STAT_WALWRITE_COLS];
+
+ /* Initialize values and NULL flags arrays */
+ MemSet(values, 0, sizeof(values));
+ MemSet(nulls, 0, sizeof(nulls));
+
+ /* Build a tuple descriptor for our result type */
+ if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
+ elog(ERROR, "return type must be a row type");
+
+ LWLockAcquire(WALWriteLock, LW_EXCLUSIVE);
+
+ /* Get statistics about the archiver process */
+ /* Fill values and NULLs */
+ values[0] = Int64GetDatum(WALWriteStats->dirty_writes);
+
+ result = TimestampTzGetDatum(WALWriteStats->stat_reset_timestamp);
+ if (result == 0)
+ nulls[1] = true;
+ else
+ values[1] = result;
+
+ LWLockRelease(WALWriteLock);
+ /* Returns the record as Datum */
+ PG_RETURN_DATUM(HeapTupleGetDatum(
+ heap_form_tuple(tupdesc, values, nulls)));
+}
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index 082a11f270..f87bfdcfa9 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -5484,6 +5484,14 @@
proargnames => '{name,blks_zeroed,blks_hit,blks_read,blks_written,blks_exists,flushes,truncates,stats_reset}',
prosrc => 'pg_stat_get_slru' },
+{ oid => '8000', descr => 'statistics: information about WAL writes activity',
+ proname => 'pg_stat_get_walwrites', provolatile => 's', proparallel => 'r',
+ prorettype => 'record', proargtypes => '',
+ proallargtypes => '{int8,timestamptz}',
+ proargmodes => '{o,o}',
+ proargnames => '{dirty_writes,stats_reset}',
+ prosrc => 'pg_stat_get_walwrites' },
+
{ oid => '2978', descr => 'statistics: number of function calls',
proname => 'pg_stat_get_function_calls', provolatile => 's',
proparallel => 'r', prorettype => 'int8', proargtypes => 'oid',
diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index 1387201382..94e4da6fd7 100644
--- a/src/include/pgstat.h
+++ b/src/include/pgstat.h
@@ -745,6 +745,19 @@ typedef struct PgStat_GlobalStats
TimestampTz stat_reset_timestamp;
} PgStat_GlobalStats;
+/*
+ * Walwrites statistics kept in the stats collector
+ */
+typedef struct PgStat_WalWritesStats
+{
+ PgStat_Counter dirty_writes; /* number of WAL write caused by full of WAL buffers */
+ TimestampTz stat_reset_timestamp; /* last time when the stats reset */
+} PgStat_WalWritesStats;
+
+/* ----------
+ * Backend types
+ * ----------
+
/*
* SLRU statistics kept in the stats collector
*/
@@ -1260,6 +1273,11 @@ extern char *pgstat_stat_filename;
*/
extern PgStat_MsgBgWriter BgWriterStats;
+/*
+ * WAL writes statistics updated in XLogWrite function
+ */
+extern PgStat_WalWritesStats * WALWriteStats;
+
/*
* Updated by pgstat_count_buffer_*_time macros
*/
@@ -1278,6 +1296,9 @@ extern int pgstat_start(void);
extern void pgstat_reset_all(void);
extern void allow_immediate_pgstat_restart(void);
+extern Size WALWritesShmemSize(void);
+extern void WALWritesShmemInit(void);
+
#ifdef EXEC_BACKEND
extern void PgstatCollectorMain(int argc, char *argv[]) pg_attribute_noreturn();
#endif
diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out
index 601734a6f1..3457cf2904 100644
--- a/src/test/regress/expected/rules.out
+++ b/src/test/regress/expected/rules.out
@@ -2136,6 +2136,9 @@ pg_stat_wal_receiver| SELECT s.pid,
s.conninfo
FROM pg_stat_get_wal_receiver() s(pid, status, receive_start_lsn, receive_start_tli, written_lsn, flushed_lsn, received_tli, last_msg_send_time, last_msg_receipt_time, latest_end_lsn, latest_end_time, slot_name, sender_host, sender_port, conninfo)
WHERE (s.pid IS NOT NULL);
+pg_stat_walwrites| SELECT a.dirty_writes,
+ a.stats_reset
+ FROM pg_stat_get_walwrites() a(dirty_writes, stats_reset);
pg_stat_xact_all_tables| SELECT c.oid AS relid,
n.nspname AS schemaname,
c.relname,
diff --git a/src/test/regress/expected/sysviews.out b/src/test/regress/expected/sysviews.out
index 06c4c3e476..3c04c57023 100644
--- a/src/test/regress/expected/sysviews.out
+++ b/src/test/regress/expected/sysviews.out
@@ -67,6 +67,13 @@ select count(*) >= 0 as ok from pg_prepared_xacts;
t
(1 row)
+-- There will surely and maximum one record
+select count(*) = 1 as ok from pg_stat_walwrites;
+ ok
+----
+ t
+(1 row)
+
-- This is to record the prevailing planner enable_foo settings during
-- a regression test run.
select name, setting from pg_settings where name like 'enable%';
diff --git a/src/test/regress/sql/sysviews.sql b/src/test/regress/sql/sysviews.sql
index 28e412b735..21f49c9a3b 100644
--- a/src/test/regress/sql/sysviews.sql
+++ b/src/test/regress/sql/sysviews.sql
@@ -32,6 +32,9 @@ select count(*) = 0 as ok from pg_prepared_statements;
-- See also prepared_xacts.sql
select count(*) >= 0 as ok from pg_prepared_xacts;
+-- There will surely and maximum one record
+select count(*) = 1 as ok from pg_stat_walwrites;
+
-- This is to record the prevailing planner enable_foo settings during
-- a regression test run.
select name, setting from pg_settings where name like 'enable%';