On Tue, Feb 25, 2025 at 02:20:48PM +0900, Michael Paquier wrote:
> With more tweaks applied to the docs, the attached is my result.

Resending once as there was a blip with the prefixes used with
format-patch, preventing git am to work by default.
--
Michael
From 196f84b8bd8f817b6e21e65de90464d32a27b286 Mon Sep 17 00:00:00 2001
From: Bertrand Drouvot <bertranddrouvot...@gmail.com>
Date: Mon, 24 Feb 2025 11:29:35 +0000
Subject: [PATCH v2] re-introduce track_wal_io_timing

a051e71e28a added the "timing tracking" in the WAL code path with "only"
track_io_timing enabled (and track_wal_io_timing still false). Then it made
sense for 2421e9a51d2 to get rid of the track_wal_io_timing GUC.

Concerns have been raised about the fact that a051e71e28a added the "timing tracking"
in the WAL code path with "only" track_io_timing enabled. So this commit re-introduces
the track_wal_io_timing GUC and ensure that WAL timing is tracked in pg_stat_io
only if track_io_timing and track_wal_io_timing are both enabled.
---
 src/include/access/xlog.h                     |  1 +
 src/include/pgstat.h                          |  2 +-
 src/backend/access/transam/xlog.c             |  9 +++---
 src/backend/access/transam/xlogreader.c       |  2 +-
 src/backend/access/transam/xlogrecovery.c     |  2 +-
 src/backend/storage/buffer/bufmgr.c           | 10 +++---
 src/backend/storage/buffer/localbuf.c         |  4 +--
 src/backend/storage/smgr/md.c                 |  4 +--
 src/backend/utils/activity/pgstat_backend.c   |  3 +-
 src/backend/utils/activity/pgstat_io.c        |  9 +++---
 src/backend/utils/misc/guc_tables.c           |  9 ++++++
 src/backend/utils/misc/postgresql.conf.sample |  1 +
 doc/src/sgml/config.sgml                      | 25 +++++++++++++++
 doc/src/sgml/monitoring.sgml                  | 31 ++++++++++++++++---
 doc/src/sgml/wal.sgml                         |  4 +--
 15 files changed, 88 insertions(+), 28 deletions(-)

diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index 0a50d160bba8..d313099c027f 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -54,6 +54,7 @@ extern PGDLLIMPORT char *wal_consistency_checking_string;
 extern PGDLLIMPORT bool log_checkpoints;
 extern PGDLLIMPORT int CommitDelay;
 extern PGDLLIMPORT int CommitSiblings;
+extern PGDLLIMPORT bool track_wal_io_timing;
 extern PGDLLIMPORT int wal_decode_buffer_size;
 
 extern PGDLLIMPORT int CheckPointSegments;
diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index fc651d03cf97..e64c527265b9 100644
--- a/src/include/pgstat.h
+++ b/src/include/pgstat.h
@@ -564,7 +564,7 @@ extern bool pgstat_bktype_io_stats_valid(PgStat_BktypeIO *backend_io,
 										 BackendType bktype);
 extern void pgstat_count_io_op(IOObject io_object, IOContext io_context,
 							   IOOp io_op, uint32 cnt, uint64 bytes);
-extern instr_time pgstat_prepare_io_time(void);
+extern instr_time pgstat_prepare_io_time(bool track_io_guc);
 extern void pgstat_count_io_op_time(IOObject io_object, IOContext io_context,
 									IOOp io_op, instr_time start_time,
 									uint32 cnt, uint64 bytes);
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 919314f82587..799fc739e18c 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -134,6 +134,7 @@ int			CommitSiblings = 5; /* # concurrent xacts needed to sleep */
 int			wal_retrieve_retry_interval = 5000;
 int			max_slot_wal_keep_size_mb = -1;
 int			wal_decode_buffer_size = 512 * 1024;
+bool		track_wal_io_timing = false;
 
 #ifdef WAL_DEBUG
 bool		XLOG_DEBUG = false;
@@ -2437,7 +2438,7 @@ XLogWrite(XLogwrtRqst WriteRqst, TimeLineID tli, bool flexible)
 				/*
 				 * Measure I/O timing to write WAL data, for pg_stat_io.
 				 */
-				start = pgstat_prepare_io_time();
+				start = pgstat_prepare_io_time(track_wal_io_timing);
 
 				pgstat_report_wait_start(WAIT_EVENT_WAL_WRITE);
 				written = pg_pwrite(openLogFile, from, nleft, startoffset);
@@ -3248,7 +3249,7 @@ XLogFileInitInternal(XLogSegNo logsegno, TimeLineID logtli,
 				 errmsg("could not create file \"%s\": %m", tmppath)));
 
 	/* Measure I/O timing when initializing segment */
-	io_start = pgstat_prepare_io_time();
+	io_start = pgstat_prepare_io_time(track_wal_io_timing);
 
 	pgstat_report_wait_start(WAIT_EVENT_WAL_INIT_WRITE);
 	save_errno = 0;
@@ -3310,7 +3311,7 @@ XLogFileInitInternal(XLogSegNo logsegno, TimeLineID logtli,
 	}
 
 	/* Measure I/O timing when flushing segment */
-	io_start = pgstat_prepare_io_time();
+	io_start = pgstat_prepare_io_time(track_wal_io_timing);
 
 	pgstat_report_wait_start(WAIT_EVENT_WAL_INIT_SYNC);
 	if (pg_fsync(fd) != 0)
@@ -8744,7 +8745,7 @@ issue_xlog_fsync(int fd, XLogSegNo segno, TimeLineID tli)
 	/*
 	 * Measure I/O timing to sync the WAL file for pg_stat_io.
 	 */
-	start = pgstat_prepare_io_time();
+	start = pgstat_prepare_io_time(track_wal_io_timing);
 
 	pgstat_report_wait_start(WAIT_EVENT_WAL_SYNC);
 	switch (wal_sync_method)
diff --git a/src/backend/access/transam/xlogreader.c b/src/backend/access/transam/xlogreader.c
index 995e4f7c26fd..5483d4f0dd2b 100644
--- a/src/backend/access/transam/xlogreader.c
+++ b/src/backend/access/transam/xlogreader.c
@@ -1557,7 +1557,7 @@ WALRead(XLogReaderState *state,
 
 #ifndef FRONTEND
 		/* Measure I/O timing when reading segment */
-		io_start = pgstat_prepare_io_time();
+		io_start = pgstat_prepare_io_time(track_wal_io_timing);
 
 		pgstat_report_wait_start(WAIT_EVENT_WAL_READ);
 #endif
diff --git a/src/backend/access/transam/xlogrecovery.c b/src/backend/access/transam/xlogrecovery.c
index 915cb330295c..c5475aaa97a1 100644
--- a/src/backend/access/transam/xlogrecovery.c
+++ b/src/backend/access/transam/xlogrecovery.c
@@ -3401,7 +3401,7 @@ retry:
 	readOff = targetPageOff;
 
 	/* Measure I/O timing when reading segment */
-	io_start = pgstat_prepare_io_time();
+	io_start = pgstat_prepare_io_time(track_wal_io_timing);
 
 	pgstat_report_wait_start(WAIT_EVENT_WAL_READ);
 	r = pg_pread(readFile, readBuf, XLOG_BLCKSZ, (off_t) readOff);
diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index d3216ef6ba75..75cfc2b6fe96 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -1509,7 +1509,7 @@ WaitReadBuffers(ReadBuffersOperation *operation)
 			io_pages[io_buffers_len++] = BufferGetBlock(buffers[i]);
 		}
 
-		io_start = pgstat_prepare_io_time();
+		io_start = pgstat_prepare_io_time(track_io_timing);
 		smgrreadv(operation->smgr, forknum, io_first_block, io_pages, io_buffers_len);
 		pgstat_count_io_op_time(io_object, io_context, IOOP_READ, io_start,
 								1, io_buffers_len * BLCKSZ);
@@ -2401,7 +2401,7 @@ ExtendBufferedRelShared(BufferManagerRelation bmr,
 		}
 	}
 
-	io_start = pgstat_prepare_io_time();
+	io_start = pgstat_prepare_io_time(track_io_timing);
 
 	/*
 	 * Note: if smgrzeroextend fails, we will end up with buffers that are
@@ -3858,7 +3858,7 @@ FlushBuffer(BufferDesc *buf, SMgrRelation reln, IOObject io_object,
 	 */
 	bufToWrite = PageSetChecksumCopy((Page) bufBlock, buf->tag.blockNum);
 
-	io_start = pgstat_prepare_io_time();
+	io_start = pgstat_prepare_io_time(track_io_timing);
 
 	/*
 	 * bufToWrite is either the shared buffer or a copy, as appropriate.
@@ -4460,7 +4460,7 @@ FlushRelationBuffers(Relation rel)
 
 				PageSetChecksumInplace(localpage, bufHdr->tag.blockNum);
 
-				io_start = pgstat_prepare_io_time();
+				io_start = pgstat_prepare_io_time(track_io_timing);
 
 				smgrwrite(srel,
 						  BufTagGetForkNum(&bufHdr->tag),
@@ -5917,7 +5917,7 @@ IssuePendingWritebacks(WritebackContext *wb_context, IOContext io_context)
 	sort_pending_writebacks(wb_context->pending_writebacks,
 							wb_context->nr_pending);
 
-	io_start = pgstat_prepare_io_time();
+	io_start = pgstat_prepare_io_time(track_io_timing);
 
 	/*
 	 * Coalesce neighbouring writes, but nothing else. For that we iterate
diff --git a/src/backend/storage/buffer/localbuf.c b/src/backend/storage/buffer/localbuf.c
index 78b65fb160ad..64931efaa756 100644
--- a/src/backend/storage/buffer/localbuf.c
+++ b/src/backend/storage/buffer/localbuf.c
@@ -244,7 +244,7 @@ GetLocalVictimBuffer(void)
 
 		PageSetChecksumInplace(localpage, bufHdr->tag.blockNum);
 
-		io_start = pgstat_prepare_io_time();
+		io_start = pgstat_prepare_io_time(track_io_timing);
 
 		/* And write... */
 		smgrwrite(oreln,
@@ -414,7 +414,7 @@ ExtendBufferedRelLocal(BufferManagerRelation bmr,
 		}
 	}
 
-	io_start = pgstat_prepare_io_time();
+	io_start = pgstat_prepare_io_time(track_io_timing);
 
 	/* actually extend relation */
 	smgrzeroextend(bmr.smgr, fork, first_block, extend_by, false);
diff --git a/src/backend/storage/smgr/md.c b/src/backend/storage/smgr/md.c
index 298754d1b64a..7bf0b45e2c34 100644
--- a/src/backend/storage/smgr/md.c
+++ b/src/backend/storage/smgr/md.c
@@ -1381,7 +1381,7 @@ register_dirty_segment(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg)
 		ereport(DEBUG1,
 				(errmsg_internal("could not forward fsync request because request queue is full")));
 
-		io_start = pgstat_prepare_io_time();
+		io_start = pgstat_prepare_io_time(track_io_timing);
 
 		if (FileSync(seg->mdfd_vfd, WAIT_EVENT_DATA_FILE_SYNC) < 0)
 			ereport(data_sync_elevel(ERROR),
@@ -1786,7 +1786,7 @@ mdsyncfiletag(const FileTag *ftag, char *path)
 		need_to_close = true;
 	}
 
-	io_start = pgstat_prepare_io_time();
+	io_start = pgstat_prepare_io_time(track_io_timing);
 
 	/* Sync the file. */
 	result = FileSync(file, WAIT_EVENT_DATA_FILE_SYNC);
diff --git a/src/backend/utils/activity/pgstat_backend.c b/src/backend/utils/activity/pgstat_backend.c
index 4a667e7019cc..338da73a9a91 100644
--- a/src/backend/utils/activity/pgstat_backend.c
+++ b/src/backend/utils/activity/pgstat_backend.c
@@ -24,6 +24,7 @@
 
 #include "postgres.h"
 
+#include "access/xlog.h"
 #include "storage/bufmgr.h"
 #include "utils/memutils.h"
 #include "utils/pgstat_internal.h"
@@ -43,7 +44,7 @@ void
 pgstat_count_backend_io_op_time(IOObject io_object, IOContext io_context,
 								IOOp io_op, instr_time io_time)
 {
-	Assert(track_io_timing);
+	Assert(track_io_timing || track_wal_io_timing);
 
 	if (!pgstat_tracks_backend_bktype(MyBackendType))
 		return;
diff --git a/src/backend/utils/activity/pgstat_io.c b/src/backend/utils/activity/pgstat_io.c
index e7362b52a37b..edb489f3a864 100644
--- a/src/backend/utils/activity/pgstat_io.c
+++ b/src/backend/utils/activity/pgstat_io.c
@@ -83,14 +83,15 @@ pgstat_count_io_op(IOObject io_object, IOContext io_context, IOOp io_op,
 }
 
 /*
- * Initialize the internal timing for an IO operation.
+ * Initialize the internal timing for an IO operation, depending on an
+ * IO timing GUC.
  */
 instr_time
-pgstat_prepare_io_time(void)
+pgstat_prepare_io_time(bool track_io_guc)
 {
 	instr_time	io_start;
 
-	if (track_io_timing)
+	if (track_io_guc)
 		INSTR_TIME_SET_CURRENT(io_start);
 	else
 	{
@@ -119,7 +120,7 @@ void
 pgstat_count_io_op_time(IOObject io_object, IOContext io_context, IOOp io_op,
 						instr_time start_time, uint32 cnt, uint64 bytes)
 {
-	if (track_io_timing)
+	if (!INSTR_TIME_IS_ZERO(start_time))
 	{
 		instr_time	io_time;
 
diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c
index 690bf96ef030..ad25cbb39c55 100644
--- a/src/backend/utils/misc/guc_tables.c
+++ b/src/backend/utils/misc/guc_tables.c
@@ -1501,6 +1501,15 @@ struct config_bool ConfigureNamesBool[] =
 		false,
 		NULL, NULL, NULL
 	},
+	{
+		{"track_wal_io_timing", PGC_SUSET, STATS_CUMULATIVE,
+			gettext_noop("Collects timing statistics for WAL I/O activity."),
+			NULL
+		},
+		&track_wal_io_timing,
+		false,
+		NULL, NULL, NULL
+	},
 
 	{
 		{"update_process_title", PGC_SUSET, PROCESS_TITLE,
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index e771d87da1fb..5362ff805195 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -641,6 +641,7 @@
 #track_counts = on
 #track_cost_delay_timing = off
 #track_io_timing = off
+#track_wal_io_timing = off
 #track_functions = none			# none, pl, all
 #stats_fetch_consistency = cache	# cache, none, snapshot
 
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index a83545761085..6186648ccaf5 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -8393,6 +8393,31 @@ COPY postgres_log FROM '/full/path/to/logfile.csv' WITH csv;
       </listitem>
      </varlistentry>
 
+     <varlistentry id="guc-track-wal-io-timing" xreflabel="track_wal_io_timing">
+      <term><varname>track_wal_io_timing</varname> (<type>boolean</type>)
+      <indexterm>
+       <primary><varname>track_wal_io_timing</varname> configuration parameter</primary>
+      </indexterm>
+      </term>
+      <listitem>
+       <para>
+        Enables timing of WAL I/O calls. This parameter is off by default,
+        as it will repeatedly query the operating system for the current time,
+        which may cause significant overhead on some platforms.
+        You can use the <application>pg_test_timing</application> tool to
+        measure the overhead of timing on your system.
+        I/O timing information is displayed in
+        <link linkend="monitoring-pg-stat-io-view">
+        <structname>pg_stat_io</structname></link> for the
+        <varname>object</varname> <literal>wal</literal> and in the output of
+        the <link linkend="pg-stat-get-backend-io">
+        <function>pg_stat_get_backend_io()</function></link> function.
+        Only superusers and users with the appropriate <literal>SET</literal>
+        privilege can change this setting.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry id="guc-track-functions" xreflabel="track_functions">
       <term><varname>track_functions</varname> (<type>enum</type>)
       <indexterm>
diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml
index 3dfd059b7ee8..9178f1d34efd 100644
--- a/doc/src/sgml/monitoring.sgml
+++ b/doc/src/sgml/monitoring.sgml
@@ -183,6 +183,11 @@ postgres   27093  0.0  0.0  30096  2752 ?        Ss   11:34   0:00 postgres: ser
    of block read, write, extend, and fsync times.
   </para>
 
+  <para>
+   The parameter <xref linkend="guc-track-wal-io-timing"/> enables monitoring
+   of WAL read, write and fsync times.
+  </para>
+
   <para>
    Normally these parameters are set in <filename>postgresql.conf</filename> so
    that they apply to all server processes, but it is possible to turn
@@ -2723,7 +2728,11 @@ description | Waiting for a newly initialized WAL file to reach durable storage
        </para>
        <para>
         Time spent in read operations in milliseconds (if
-        <xref linkend="guc-track-io-timing"/> is enabled, otherwise zero)
+        <xref linkend="guc-track-io-timing"/> is enabled and
+        <varname>object</varname> is not <literal>wal</literal>,
+        or if <xref linkend="guc-track-wal-io-timing"/> is enabled
+        and <varname>object</varname> is <literal>wal</literal>,
+        otherwise zero)
        </para>
       </entry>
      </row>
@@ -2757,7 +2766,11 @@ description | Waiting for a newly initialized WAL file to reach durable storage
        </para>
        <para>
         Time spent in write operations in milliseconds (if
-        <xref linkend="guc-track-io-timing"/> is enabled, otherwise zero)
+        <xref linkend="guc-track-io-timing"/> is enabled and
+        <varname>object</varname> is not <literal>wal</literal>,
+        or if <xref linkend="guc-track-wal-io-timing"/> is enabled
+        and <varname>object</varname> is <literal>wal</literal>,
+        otherwise zero)
        </para>
       </entry>
      </row>
@@ -2816,8 +2829,12 @@ description | Waiting for a newly initialized WAL file to reach durable storage
         <structfield>extend_time</structfield> <type>double precision</type>
        </para>
        <para>
-        Time spent in extend operations in milliseconds (if
-        <xref linkend="guc-track-io-timing"/> is enabled, otherwise zero)
+        Time spent in extend operations in milliseconds. (if
+        <xref linkend="guc-track-io-timing"/> is enabled and
+        <varname>object</varname> is not <literal>wal</literal>,
+        or if <xref linkend="guc-track-wal-io-timing"/> is enabled
+        and <varname>object</varname> is <literal>wal</literal>,
+        otherwise zero)
        </para>
       </entry>
      </row>
@@ -2887,7 +2904,11 @@ description | Waiting for a newly initialized WAL file to reach durable storage
        </para>
        <para>
         Time spent in fsync operations in milliseconds (if
-        <xref linkend="guc-track-io-timing"/> is enabled, otherwise zero)
+        <xref linkend="guc-track-io-timing"/> is enabled and
+        <varname>object</varname> is not <literal>wal</literal>,
+        or if <xref linkend="guc-track-wal-io-timing"/> is enabled
+        and <varname>object</varname> is <literal>wal</literal>,
+        otherwise zero)
        </para>
       </entry>
      </row>
diff --git a/doc/src/sgml/wal.sgml b/doc/src/sgml/wal.sgml
index 4fc09761115e..f3b86b26be90 100644
--- a/doc/src/sgml/wal.sgml
+++ b/doc/src/sgml/wal.sgml
@@ -808,7 +808,7 @@
   <para>
    There are two internal functions to write WAL data to disk:
    <function>XLogWrite</function> and <function>issue_xlog_fsync</function>.
-   When <xref linkend="guc-track-io-timing"/> is enabled, the total
+   When <xref linkend="guc-track-wal-io-timing"/> is enabled, the total
    amounts of time <function>XLogWrite</function> writes and
    <function>issue_xlog_fsync</function> syncs WAL data to disk are counted as
    <varname>write_time</varname> and <varname>fsync_time</varname> in
@@ -828,7 +828,7 @@
    <literal>fsync</literal>, or <literal>fsync_writethrough</literal>,
    the write operation moves WAL buffers to kernel cache and
    <function>issue_xlog_fsync</function> syncs them to disk. Regardless
-   of the setting of <varname>track_io_timing</varname>, the number
+   of the setting of <varname>track_wal_io_timing</varname>, the number
    of times <function>XLogWrite</function> writes and
    <function>issue_xlog_fsync</function> syncs WAL data to disk are also
    counted as <varname>writes</varname> and <varname>fsyncs</varname>
-- 
2.47.2

Attachment: signature.asc
Description: PGP signature

Reply via email to