Hi,

Thanks for the feedback! The new version of the patch is attached.

On Tue, 5 Dec 2023 at 09:16, Michael Paquier <mich...@paquier.xyz> wrote:
>
> -       if (io_op == IOOP_WRITE || io_op == IOOP_EXTEND)
> +       if (io_op == IOOP_EXTEND || io_op == IOOP_WRITE)
>
> Unrelated diff.

Done.

>
> +   if (io_object == IOOBJECT_WAL && io_context == IOCONTEXT_NORMAL &&
> +       io_op == IOOP_FSYNC)
> +       PendingWalStats.wal_sync += cnt;
>
> Nah, I really don't think that adding this dependency within
> pg_stat_io is a good idea.
>
> -   PendingWalStats.wal_sync++;
> +   pgstat_count_io_op_time(IOOBJECT_WAL, IOCONTEXT_NORMAL, IOOP_FSYNC,
> +                           io_start, 1);
>
> This is the only caller where this matters, and the count is always 1.

I reverted that, pgstat_count_io_op_n doesn't count
PendingWalStats.wal_sync now.

>
> +       no_wal_normal_read = bktype == B_AUTOVAC_LAUNCHER ||
> +               bktype == B_AUTOVAC_WORKER || bktype == B_BACKEND ||
> +               bktype == B_BG_WORKER || bktype == B_BG_WRITER ||
> +               bktype == B_CHECKPOINTER || bktype == B_WAL_RECEIVER ||
> +               bktype == B_WAL_SENDER || bktype == B_WAL_WRITER;
> +
> +       if (no_wal_normal_read &&
> +               (io_object == IOOBJECT_WAL &&
> +                io_op == IOOP_READ))
> +               return false;
>
> This may be more readable if an enum is applied, without a default
> clause so as it would not be forgotten if a new type is added, perhaps
> in its own little routine.

Done.

>
> -   if (track_io_timing)
> +   if (track_io_timing || track_wal_io_timing)
>         INSTR_TIME_SET_CURRENT(io_start);
>     else
>
> This interface from pgstat_prepare_io_time() is not really good,
> because we could finish by setting io_start in the existing code paths
> calling this routine even if track_io_timing is false when
> track_wal_io_timing is true.  Why not changing this interface a bit
> and pass down a GUC (track_io_timing or track_wal_io_timing) as an
> argument of the function depending on what we expect to trigger the
> timings?

Done in 0001.

>
> -       /* Convert counters from microsec to millisec for display */
> -       values[6] = Float8GetDatum(((double) wal_stats->wal_write_time) / 
> 1000.0);
> -       values[7] = Float8GetDatum(((double) wal_stats->wal_sync_time) / 
> 1000.0);
> +       /*
> +        * There is no need to calculate timings for both pg_stat_wal and
> +        * pg_stat_io. So, fetch timings from pg_stat_io to make stats 
> gathering
> +        * cheaper. Note that, since timings are fetched from pg_stat_io;
> +        * pg_stat_reset_shared('io') will reset pg_stat_wal's timings too.
> +        *
> +        * Convert counters from microsec to millisec for display
> +        */
> +       values[6] = Float8GetDatum(pg_stat_get_io_time(IOOBJECT_WAL,
> +                                                                             
>                      IOCONTEXT_NORMAL,
> +                                                                             
>                      IOOP_WRITE));
> +       values[7] = Float8GetDatum(pg_stat_get_io_time(IOOBJECT_WAL,
> +                                                                             
>                      IOCONTEXT_NORMAL,
> +                                                                             
>                      IOOP_FSYNC));
>
> Perhaps it is simpler to remove these columns from pg_stat_get_wal()
> and plug an SQL upgrade to the view definition of pg_stat_wal?

Done in 0003 but I am not sure if that is what you expected.

> Finding a good balance between the subroutines, the two GUCs, the
> contexts, the I/O operation type and the objects is the tricky part of
> this patch.  If the dependency to PendingWalStats is removed and if
> the interface of pgstat_prepare_io_time is improved, things are a bit
> cleaner, but it feels like we could do more..  Nya.

I agree. The patch is not logically complicated but it is hard to
select the best way.

Any kind of feedback would be appreciated.

--
Regards,
Nazir Bilal Yavuz
Microsoft
From b7bf7b92fa274775136314ecfde90fa32ed435cb Mon Sep 17 00:00:00 2001
From: Nazir Bilal Yavuz <byavuz81@gmail.com>
Date: Wed, 29 Nov 2023 15:30:03 +0300
Subject: [PATCH v6 6/6] Add IOOBJECT_WAL / IOCONTEXT_NORMAL / read tests

---
 src/test/regress/expected/stats.out | 12 ++++++++++++
 src/test/regress/sql/stats.sql      |  8 ++++++++
 2 files changed, 20 insertions(+)

diff --git a/src/test/regress/expected/stats.out b/src/test/regress/expected/stats.out
index 4adda9e479..7f5340cd7e 100644
--- a/src/test/regress/expected/stats.out
+++ b/src/test/regress/expected/stats.out
@@ -881,6 +881,18 @@ SELECT current_setting('fsync') = 'off'
  t
 (1 row)
 
+-- Test pg_stat_io IOOBJECT_WAL / IOCONTEXT_NORMAL / read.
+-- When the servers starts, StartupXLOG function must be called by postmaster
+-- or standalone-backend startup and WAL read must be done.
+-- So, check these before stats get resetted.
+SELECT SUM(reads) > 0
+  FROM pg_stat_io
+  WHERE context = 'normal' and object = 'wal';
+ ?column? 
+----------
+ t
+(1 row)
+
 -----
 -- Test that resetting stats works for reset timestamp
 -----
diff --git a/src/test/regress/sql/stats.sql b/src/test/regress/sql/stats.sql
index 72e864a0d2..b13f17f4d2 100644
--- a/src/test/regress/sql/stats.sql
+++ b/src/test/regress/sql/stats.sql
@@ -452,6 +452,14 @@ SELECT :io_sum_wal_init_writes > 0;
 SELECT current_setting('fsync') = 'off'
   OR :io_sum_wal_init_fsyncs > 0;
 
+-- Test pg_stat_io IOOBJECT_WAL / IOCONTEXT_NORMAL / read.
+-- When the servers starts, StartupXLOG function must be called by postmaster
+-- or standalone-backend startup and WAL read must be done.
+-- So, check these before stats get resetted.
+SELECT SUM(reads) > 0
+  FROM pg_stat_io
+  WHERE context = 'normal' and object = 'wal';
+
 -----
 -- Test that resetting stats works for reset timestamp
 -----
-- 
2.43.0

From afcc55a01f38ab79d7dd17d68e7b1d5c4be41d8d Mon Sep 17 00:00:00 2001
From: Nazir Bilal Yavuz <byavuz81@gmail.com>
Date: Tue, 12 Dec 2023 11:17:42 +0300
Subject: [PATCH v6 3/6] Fetch pg_stat_wal's timings from pg_stat_io

For the not calculating WAL timings on pg_stat_wal and pg_stat_io view,
pg_stat_wal view's WAL timings are fetched from pg_stat_io. Since these
timings are fetched from pg_stat_io, pg_stat_reset_shared('io') will
reset pg_stat_wal's timings too.
---
 doc/src/sgml/monitoring.sgml         |  9 ++++++---
 src/backend/catalog/system_views.sql | 15 ++++++++++++---
 src/backend/utils/adt/pgstatfuncs.c  | 14 +++-----------
 src/include/catalog/pg_proc.dat      |  6 +++---
 src/test/regress/expected/rules.out  | 27 +++++++++++++++++----------
 5 files changed, 41 insertions(+), 30 deletions(-)

diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml
index 0450f91ccb..679c527f46 100644
--- a/doc/src/sgml/monitoring.sgml
+++ b/doc/src/sgml/monitoring.sgml
@@ -4753,7 +4753,9 @@ description | Waiting for a newly initialized WAL file to reach durable storage
         <listitem>
          <para>
           <literal>io</literal>: Reset all the counters shown in the
-          <structname>pg_stat_io</structname> view.
+          <structname>pg_stat_io</structname> view. Note that, this will
+          reset <structname>pg_stat_wal</structname> view's timing counters
+          too.
          </para>
         </listitem>
         <listitem>
@@ -4770,8 +4772,9 @@ description | Waiting for a newly initialized WAL file to reach durable storage
         </listitem>
         <listitem>
          <para>
-          <literal>wal</literal>: Reset all the counters shown in the
-          <structname>pg_stat_wal</structname> view.
+          <literal>wal</literal>: Reset all the counters except timings shown
+          in the <structname>pg_stat_wal</structname> view. These timing
+          counters can be reset by calling pg_stat_reset_shared with 'io'.
          </para>
         </listitem>
         <listitem>
diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql
index 11d18ed9dd..91c6acf754 100644
--- a/src/backend/catalog/system_views.sql
+++ b/src/backend/catalog/system_views.sql
@@ -1169,6 +1169,13 @@ SELECT
 FROM pg_stat_get_io() b;
 
 CREATE VIEW pg_stat_wal AS
+    WITH pgsio_sum_write_fsync_time AS (
+        SELECT
+            sum(write_time) as sum_write_time,
+            sum(fsync_time) as sum_fsync_time
+        FROM pg_stat_io
+        WHERE context = 'normal' and object = 'wal'
+    )
     SELECT
         w.wal_records,
         w.wal_fpi,
@@ -1176,10 +1183,12 @@ CREATE VIEW pg_stat_wal AS
         w.wal_buffers_full,
         w.wal_write,
         w.wal_sync,
-        w.wal_write_time,
-        w.wal_sync_time,
+        p.sum_write_time as write_time,
+        p.sum_fsync_time as fsync_time,
         w.stats_reset
-    FROM pg_stat_get_wal() w;
+    FROM pg_stat_get_wal() w
+    CROSS JOIN pgsio_sum_write_fsync_time p;
+
 
 CREATE VIEW pg_stat_progress_analyze AS
     SELECT
diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c
index 8d14a4183c..96e318bf69 100644
--- a/src/backend/utils/adt/pgstatfuncs.c
+++ b/src/backend/utils/adt/pgstatfuncs.c
@@ -1452,7 +1452,7 @@ pg_stat_get_io(PG_FUNCTION_ARGS)
 Datum
 pg_stat_get_wal(PG_FUNCTION_ARGS)
 {
-#define PG_STAT_GET_WAL_COLS	9
+#define PG_STAT_GET_WAL_COLS	7
 	TupleDesc	tupdesc;
 	Datum		values[PG_STAT_GET_WAL_COLS] = {0};
 	bool		nulls[PG_STAT_GET_WAL_COLS] = {0};
@@ -1473,11 +1473,7 @@ pg_stat_get_wal(PG_FUNCTION_ARGS)
 					   INT8OID, -1, 0);
 	TupleDescInitEntry(tupdesc, (AttrNumber) 6, "wal_sync",
 					   INT8OID, -1, 0);
-	TupleDescInitEntry(tupdesc, (AttrNumber) 7, "wal_write_time",
-					   FLOAT8OID, -1, 0);
-	TupleDescInitEntry(tupdesc, (AttrNumber) 8, "wal_sync_time",
-					   FLOAT8OID, -1, 0);
-	TupleDescInitEntry(tupdesc, (AttrNumber) 9, "stats_reset",
+	TupleDescInitEntry(tupdesc, (AttrNumber) 7, "stats_reset",
 					   TIMESTAMPTZOID, -1, 0);
 
 	BlessTupleDesc(tupdesc);
@@ -1500,11 +1496,7 @@ pg_stat_get_wal(PG_FUNCTION_ARGS)
 	values[4] = Int64GetDatum(wal_stats->wal_write);
 	values[5] = Int64GetDatum(wal_stats->wal_sync);
 
-	/* Convert counters from microsec to millisec for display */
-	values[6] = Float8GetDatum(((double) wal_stats->wal_write_time) / 1000.0);
-	values[7] = Float8GetDatum(((double) wal_stats->wal_sync_time) / 1000.0);
-
-	values[8] = TimestampTzGetDatum(wal_stats->stat_reset_timestamp);
+	values[6] = TimestampTzGetDatum(wal_stats->stat_reset_timestamp);
 
 	/* Returns the record as Datum */
 	PG_RETURN_DATUM(HeapTupleGetDatum(heap_form_tuple(tupdesc, values, nulls)));
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index 77e8b13764..38050cbb09 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -5770,9 +5770,9 @@
 { oid => '1136', descr => 'statistics: information about WAL activity',
   proname => 'pg_stat_get_wal', proisstrict => 'f', provolatile => 's',
   proparallel => 'r', prorettype => 'record', proargtypes => '',
-  proallargtypes => '{int8,int8,numeric,int8,int8,int8,float8,float8,timestamptz}',
-  proargmodes => '{o,o,o,o,o,o,o,o,o}',
-  proargnames => '{wal_records,wal_fpi,wal_bytes,wal_buffers_full,wal_write,wal_sync,wal_write_time,wal_sync_time,stats_reset}',
+  proallargtypes => '{int8,int8,numeric,int8,int8,int8,timestamptz}',
+  proargmodes => '{o,o,o,o,o,o,o}',
+  proargnames => '{wal_records,wal_fpi,wal_bytes,wal_buffers_full,wal_write,wal_sync,stats_reset}',
   prosrc => 'pg_stat_get_wal' },
 { oid => '6248', descr => 'statistics: information about WAL prefetching',
   proname => 'pg_stat_get_recovery_prefetch', prorows => '1', proretset => 't',
diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out
index 05070393b9..d3f6301c9d 100644
--- a/src/test/regress/expected/rules.out
+++ b/src/test/regress/expected/rules.out
@@ -2222,16 +2222,23 @@ pg_stat_user_tables| SELECT relid,
     autoanalyze_count
    FROM pg_stat_all_tables
   WHERE ((schemaname <> ALL (ARRAY['pg_catalog'::name, 'information_schema'::name])) AND (schemaname !~ '^pg_toast'::text));
-pg_stat_wal| SELECT wal_records,
-    wal_fpi,
-    wal_bytes,
-    wal_buffers_full,
-    wal_write,
-    wal_sync,
-    wal_write_time,
-    wal_sync_time,
-    stats_reset
-   FROM pg_stat_get_wal() w(wal_records, wal_fpi, wal_bytes, wal_buffers_full, wal_write, wal_sync, wal_write_time, wal_sync_time, stats_reset);
+pg_stat_wal| WITH pgsio_sum_write_fsync_time AS (
+         SELECT sum(pg_stat_io.write_time) AS sum_write_time,
+            sum(pg_stat_io.fsync_time) AS sum_fsync_time
+           FROM pg_stat_io
+          WHERE ((pg_stat_io.context = 'normal'::text) AND (pg_stat_io.object = 'wal'::text))
+        )
+ SELECT w.wal_records,
+    w.wal_fpi,
+    w.wal_bytes,
+    w.wal_buffers_full,
+    w.wal_write,
+    w.wal_sync,
+    p.sum_write_time AS write_time,
+    p.sum_fsync_time AS fsync_time,
+    w.stats_reset
+   FROM (pg_stat_get_wal() w(wal_records, wal_fpi, wal_bytes, wal_buffers_full, wal_write, wal_sync, stats_reset)
+     CROSS JOIN pgsio_sum_write_fsync_time p);
 pg_stat_wal_receiver| SELECT pid,
     status,
     receive_start_lsn,
-- 
2.43.0

From 6367c92c957b05ad4a098c35e0df9ef51b2126c6 Mon Sep 17 00:00:00 2001
From: Nazir Bilal Yavuz <byavuz81@gmail.com>
Date: Fri, 1 Dec 2023 10:03:21 +0300
Subject: [PATCH v6 5/6] Add IOOBJECT_WAL / IOCONTEXT_INIT / write and fsync
 tests

---
 src/test/regress/expected/stats.out | 19 +++++++++++++++++++
 src/test/regress/sql/stats.sql      | 10 ++++++++++
 2 files changed, 29 insertions(+)

diff --git a/src/test/regress/expected/stats.out b/src/test/regress/expected/stats.out
index 4d3a515bdd..4adda9e479 100644
--- a/src/test/regress/expected/stats.out
+++ b/src/test/regress/expected/stats.out
@@ -862,6 +862,25 @@ WHERE pg_stat_get_backend_pid(beid) = pg_backend_pid();
  t
 (1 row)
 
+-- Test pg_stat_io IOOBJECT_WAL / IOCONTEXT_INIT / write and fsync.
+-- When the servers starts, the initial WAL file must be created,
+-- so check these stats before stats get resetted.
+SELECT sum(writes) AS writes, sum(fsyncs) AS fsyncs
+  FROM pg_stat_io
+  WHERE context = 'init' and object = 'wal' \gset io_sum_wal_init_
+SELECT :io_sum_wal_init_writes > 0;
+ ?column? 
+----------
+ t
+(1 row)
+
+SELECT current_setting('fsync') = 'off'
+  OR :io_sum_wal_init_fsyncs > 0;
+ ?column? 
+----------
+ t
+(1 row)
+
 -----
 -- Test that resetting stats works for reset timestamp
 -----
diff --git a/src/test/regress/sql/stats.sql b/src/test/regress/sql/stats.sql
index aa48e65dc8..72e864a0d2 100644
--- a/src/test/regress/sql/stats.sql
+++ b/src/test/regress/sql/stats.sql
@@ -442,6 +442,16 @@ SELECT (current_schemas(true))[1] = ('pg_temp_' || beid::text) AS match
 FROM pg_stat_get_backend_idset() beid
 WHERE pg_stat_get_backend_pid(beid) = pg_backend_pid();
 
+-- Test pg_stat_io IOOBJECT_WAL / IOCONTEXT_INIT / write and fsync.
+-- When the servers starts, the initial WAL file must be created,
+-- so check these stats before stats get resetted.
+SELECT sum(writes) AS writes, sum(fsyncs) AS fsyncs
+  FROM pg_stat_io
+  WHERE context = 'init' and object = 'wal' \gset io_sum_wal_init_
+SELECT :io_sum_wal_init_writes > 0;
+SELECT current_setting('fsync') = 'off'
+  OR :io_sum_wal_init_fsyncs > 0;
+
 -----
 -- Test that resetting stats works for reset timestamp
 -----
-- 
2.43.0

From c328c9b8862e600328b4d91592031c393cc27464 Mon Sep 17 00:00:00 2001
From: Nazir Bilal Yavuz <byavuz81@gmail.com>
Date: Fri, 10 Nov 2023 14:52:22 +0300
Subject: [PATCH v6 4/6] Add IOOBJECT_WAL / IOCONTEXT_NORMAL / write and fsync
 tests

---
 src/test/regress/expected/stats.out | 26 ++++++++++++++++++++++++++
 src/test/regress/sql/stats.sql      | 11 +++++++++++
 2 files changed, 37 insertions(+)

diff --git a/src/test/regress/expected/stats.out b/src/test/regress/expected/stats.out
index 346e10a3d2..4d3a515bdd 100644
--- a/src/test/regress/expected/stats.out
+++ b/src/test/regress/expected/stats.out
@@ -1255,6 +1255,7 @@ SELECT pg_stat_get_subscription_stats(NULL);
 -- - extends of relations using shared buffers
 -- - fsyncs done to ensure the durability of data dirtying shared buffers
 -- - shared buffer hits
+-- - WAL writes and fsyncs in IOContext IOCONTEXT_NORMAL
 -- There is no test for blocks evicted from shared buffers, because we cannot
 -- be sure of the state of shared buffers at the point the test is run.
 -- Create a regular table and insert some data to generate IOCONTEXT_NORMAL
@@ -1264,6 +1265,9 @@ SELECT sum(extends) AS io_sum_shared_before_extends
 SELECT sum(writes) AS writes, sum(fsyncs) AS fsyncs
   FROM pg_stat_io
   WHERE object = 'relation' \gset io_sum_shared_before_
+SELECT sum(writes) AS writes, sum(fsyncs) AS fsyncs
+  FROM pg_stat_io
+  WHERE context = 'normal' and object = 'wal' \gset io_sum_wal_normal_before_
 CREATE TABLE test_io_shared(a int);
 INSERT INTO test_io_shared SELECT i FROM generate_series(1,100)i;
 SELECT pg_stat_force_next_flush();
@@ -1301,6 +1305,28 @@ SELECT current_setting('fsync') = 'off'
  t
 (1 row)
 
+SELECT sum(writes) AS writes, sum(fsyncs) AS fsyncs
+  FROM pg_stat_io
+  WHERE context = 'normal' and object = 'wal' \gset io_sum_wal_normal_after_
+SELECT current_setting('synchronous_commit') = 'on';
+ ?column? 
+----------
+ t
+(1 row)
+
+SELECT :io_sum_wal_normal_after_writes > :io_sum_wal_normal_before_writes;
+ ?column? 
+----------
+ t
+(1 row)
+
+SELECT current_setting('fsync') = 'off'
+  OR :io_sum_wal_normal_after_fsyncs > :io_sum_wal_normal_before_fsyncs;
+ ?column? 
+----------
+ t
+(1 row)
+
 -- Change the tablespace so that the table is rewritten directly, then SELECT
 -- from it to cause it to be read back into shared buffers.
 SELECT sum(reads) AS io_sum_shared_before_reads
diff --git a/src/test/regress/sql/stats.sql b/src/test/regress/sql/stats.sql
index e3b4ca96e8..aa48e65dc8 100644
--- a/src/test/regress/sql/stats.sql
+++ b/src/test/regress/sql/stats.sql
@@ -601,6 +601,7 @@ SELECT pg_stat_get_subscription_stats(NULL);
 -- - extends of relations using shared buffers
 -- - fsyncs done to ensure the durability of data dirtying shared buffers
 -- - shared buffer hits
+-- - WAL writes and fsyncs in IOContext IOCONTEXT_NORMAL
 
 -- There is no test for blocks evicted from shared buffers, because we cannot
 -- be sure of the state of shared buffers at the point the test is run.
@@ -612,6 +613,9 @@ SELECT sum(extends) AS io_sum_shared_before_extends
 SELECT sum(writes) AS writes, sum(fsyncs) AS fsyncs
   FROM pg_stat_io
   WHERE object = 'relation' \gset io_sum_shared_before_
+SELECT sum(writes) AS writes, sum(fsyncs) AS fsyncs
+  FROM pg_stat_io
+  WHERE context = 'normal' and object = 'wal' \gset io_sum_wal_normal_before_
 CREATE TABLE test_io_shared(a int);
 INSERT INTO test_io_shared SELECT i FROM generate_series(1,100)i;
 SELECT pg_stat_force_next_flush();
@@ -630,6 +634,13 @@ SELECT sum(writes) AS writes, sum(fsyncs) AS fsyncs
 SELECT :io_sum_shared_after_writes > :io_sum_shared_before_writes;
 SELECT current_setting('fsync') = 'off'
   OR :io_sum_shared_after_fsyncs > :io_sum_shared_before_fsyncs;
+SELECT sum(writes) AS writes, sum(fsyncs) AS fsyncs
+  FROM pg_stat_io
+  WHERE context = 'normal' and object = 'wal' \gset io_sum_wal_normal_after_
+SELECT current_setting('synchronous_commit') = 'on';
+SELECT :io_sum_wal_normal_after_writes > :io_sum_wal_normal_before_writes;
+SELECT current_setting('fsync') = 'off'
+  OR :io_sum_wal_normal_after_fsyncs > :io_sum_wal_normal_before_fsyncs;
 
 -- Change the tablespace so that the table is rewritten directly, then SELECT
 -- from it to cause it to be read back into shared buffers.
-- 
2.43.0

From ee53af3bfaf4582a940c0b2bd93f5b98739ef423 Mon Sep 17 00:00:00 2001
From: Nazir Bilal Yavuz <byavuz81@gmail.com>
Date: Tue, 12 Dec 2023 10:40:02 +0300
Subject: [PATCH v6 2/6] Show WAL stats on pg_stat_io (except streaming
 replication)

This patch aims to showing WAL stats per backend on pg_stat_io view.

With this patch, it can be seen how many WAL operations it makes, their
context, types and total timings per backend in pg_stat_io view.

In this path new IOContext IOCONTEXT_INIT is introduced, it is for IO
operations done while creating the things. Currently, it is used only
together with IOObject IOOBJECT_WAL.

IOOBJECT_WAL means IO operations related to WAL.
IOOBJECT_WAL / IOCONTEXT_NORMAL means IO operations done on already
created WAL segments.
IOOBJECT_WAL / IOCONTEXT_INIT means IO operations done while creating
the WAL segments.

This patch currently covers:
- Documentation
- IOOBJECT_WAL / IOCONTEXT_NORMAL / read, write and fsync stats on
  pg_stat_io.
- IOOBJECT_WAL / IOCONTEXT_INIT / write and fsync stats on pg_stat_io.

doesn't cover:
- Streaming replication WAL IO.
---
 doc/src/sgml/monitoring.sgml              |  19 +++-
 src/backend/access/transam/xlog.c         |  58 ++++------
 src/backend/access/transam/xlogrecovery.c |  10 ++
 src/backend/utils/activity/pgstat_io.c    | 124 +++++++++++++++++++++-
 src/backend/utils/adt/pgstatfuncs.c       |  10 +-
 src/include/pgstat.h                      |  10 +-
 6 files changed, 182 insertions(+), 49 deletions(-)

diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml
index 42509042ad..0450f91ccb 100644
--- a/doc/src/sgml/monitoring.sgml
+++ b/doc/src/sgml/monitoring.sgml
@@ -2499,9 +2499,10 @@ description | Waiting for a newly initialized WAL file to reach durable storage
   </para>
 
   <para>
-   Currently, I/O on relations (e.g. tables, indexes) is tracked. However,
-   relation I/O which bypasses shared buffers (e.g. when moving a table from one
-   tablespace to another) is currently not tracked.
+   Currently, I/O on relations (e.g. tables, indexes) and WAL activities are
+   tracked. However, relation I/O which bypasses shared buffers
+   (e.g. when moving a table from one tablespace to another) is currently
+   not tracked.
   </para>
 
   <table id="pg-stat-io-view" xreflabel="pg_stat_io">
@@ -2554,6 +2555,11 @@ description | Waiting for a newly initialized WAL file to reach durable storage
           <literal>temp relation</literal>: Temporary relations.
          </para>
         </listitem>
+        <listitem>
+         <para>
+          <literal>wal</literal>: Write Ahead Logs.
+         </para>
+        </listitem>
        </itemizedlist>
        </para>
       </entry>
@@ -2578,6 +2584,13 @@ description | Waiting for a newly initialized WAL file to reach durable storage
           <literal>normal</literal>.
          </para>
         </listitem>
+        <listitem>
+         <para>
+          <literal>init</literal>: I/O operations performed while creating the
+          WAL segments are tracked in <varname>context</varname>
+          <literal>init</literal>.
+         </para>
+        </listitem>
         <listitem>
          <para>
           <literal>vacuum</literal>: I/O operations performed outside of shared
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 01e0484584..6f7149084f 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -2277,38 +2277,22 @@ XLogWrite(XLogwrtRqst WriteRqst, TimeLineID tli, bool flexible)
 			Size		nbytes;
 			Size		nleft;
 			int			written;
-			instr_time	start;
+			instr_time	io_start;
 
 			/* OK to write the page(s) */
 			from = XLogCtl->pages + startidx * (Size) XLOG_BLCKSZ;
 			nbytes = npages * (Size) XLOG_BLCKSZ;
 			nleft = nbytes;
+
+			io_start = pgstat_prepare_io_time(track_wal_io_timing);
 			do
 			{
 				errno = 0;
 
-				/* Measure I/O timing to write WAL data */
-				if (track_wal_io_timing)
-					INSTR_TIME_SET_CURRENT(start);
-				else
-					INSTR_TIME_SET_ZERO(start);
-
 				pgstat_report_wait_start(WAIT_EVENT_WAL_WRITE);
 				written = pg_pwrite(openLogFile, from, nleft, startoffset);
 				pgstat_report_wait_end();
 
-				/*
-				 * Increment the I/O timing and the number of times WAL data
-				 * were written out to disk.
-				 */
-				if (track_wal_io_timing)
-				{
-					instr_time	end;
-
-					INSTR_TIME_SET_CURRENT(end);
-					INSTR_TIME_ACCUM_DIFF(PendingWalStats.wal_write_time, end, start);
-				}
-
 				PendingWalStats.wal_write++;
 
 				if (written <= 0)
@@ -2333,6 +2317,9 @@ XLogWrite(XLogwrtRqst WriteRqst, TimeLineID tli, bool flexible)
 				startoffset += written;
 			} while (nleft > 0);
 
+			pgstat_count_io_op_time(IOOBJECT_WAL, IOCONTEXT_NORMAL,
+									IOOP_WRITE, io_start, npages);
+
 			npages = 0;
 
 			/*
@@ -3039,6 +3026,7 @@ XLogFileInitInternal(XLogSegNo logsegno, TimeLineID logtli,
 	int			fd;
 	int			save_errno;
 	int			open_flags = O_RDWR | O_CREAT | O_EXCL | PG_BINARY;
+	instr_time	io_start;
 
 	Assert(logtli != 0);
 
@@ -3082,6 +3070,9 @@ XLogFileInitInternal(XLogSegNo logsegno, TimeLineID logtli,
 				(errcode_for_file_access(),
 				 errmsg("could not create file \"%s\": %m", tmppath)));
 
+	/* start timing writes for stats */
+	io_start = pgstat_prepare_io_time(track_wal_io_timing);
+
 	pgstat_report_wait_start(WAIT_EVENT_WAL_INIT_WRITE);
 	save_errno = 0;
 	if (wal_init_zero)
@@ -3117,6 +3108,9 @@ XLogFileInitInternal(XLogSegNo logsegno, TimeLineID logtli,
 	}
 	pgstat_report_wait_end();
 
+	pgstat_count_io_op_time(IOOBJECT_WAL, IOCONTEXT_INIT, IOOP_WRITE,
+							io_start, 1);
+
 	if (save_errno)
 	{
 		/*
@@ -3133,6 +3127,9 @@ XLogFileInitInternal(XLogSegNo logsegno, TimeLineID logtli,
 				 errmsg("could not write to file \"%s\": %m", tmppath)));
 	}
 
+	/* start timing fsyncs for stats */
+	io_start = pgstat_prepare_io_time(track_wal_io_timing);
+
 	pgstat_report_wait_start(WAIT_EVENT_WAL_INIT_SYNC);
 	if (pg_fsync(fd) != 0)
 	{
@@ -3145,6 +3142,9 @@ XLogFileInitInternal(XLogSegNo logsegno, TimeLineID logtli,
 	}
 	pgstat_report_wait_end();
 
+	pgstat_count_io_op_time(IOOBJECT_WAL, IOCONTEXT_INIT,
+							IOOP_FSYNC, io_start, 1);
+
 	if (close(fd) != 0)
 		ereport(ERROR,
 				(errcode_for_file_access(),
@@ -8317,7 +8317,7 @@ void
 issue_xlog_fsync(int fd, XLogSegNo segno, TimeLineID tli)
 {
 	char	   *msg = NULL;
-	instr_time	start;
+	instr_time	io_start;
 
 	Assert(tli != 0);
 
@@ -8330,11 +8330,7 @@ issue_xlog_fsync(int fd, XLogSegNo segno, TimeLineID tli)
 		wal_sync_method == WAL_SYNC_METHOD_OPEN_DSYNC)
 		return;
 
-	/* Measure I/O timing to sync the WAL file */
-	if (track_wal_io_timing)
-		INSTR_TIME_SET_CURRENT(start);
-	else
-		INSTR_TIME_SET_ZERO(start);
+	io_start = pgstat_prepare_io_time(track_wal_io_timing);
 
 	pgstat_report_wait_start(WAIT_EVENT_WAL_SYNC);
 	switch (wal_sync_method)
@@ -8378,16 +8374,8 @@ issue_xlog_fsync(int fd, XLogSegNo segno, TimeLineID tli)
 
 	pgstat_report_wait_end();
 
-	/*
-	 * Increment the I/O timing and the number of times WAL files were synced.
-	 */
-	if (track_wal_io_timing)
-	{
-		instr_time	end;
-
-		INSTR_TIME_SET_CURRENT(end);
-		INSTR_TIME_ACCUM_DIFF(PendingWalStats.wal_sync_time, end, start);
-	}
+	pgstat_count_io_op_time(IOOBJECT_WAL, IOCONTEXT_NORMAL, IOOP_FSYNC,
+							io_start, 1);
 
 	PendingWalStats.wal_sync++;
 }
diff --git a/src/backend/access/transam/xlogrecovery.c b/src/backend/access/transam/xlogrecovery.c
index a2c8fa3981..16c1b9ba99 100644
--- a/src/backend/access/transam/xlogrecovery.c
+++ b/src/backend/access/transam/xlogrecovery.c
@@ -60,6 +60,7 @@
 #include "utils/builtins.h"
 #include "utils/datetime.h"
 #include "utils/guc_hooks.h"
+#include "utils/pgstat_internal.h"
 #include "utils/pg_lsn.h"
 #include "utils/ps_status.h"
 #include "utils/pg_rusage.h"
@@ -1771,6 +1772,9 @@ PerformWalRecovery(void)
 			 */
 			ApplyWalRecord(xlogreader, record, &replayTLI);
 
+			/* Report pending statistics to the cumulative stats system  */
+			pgstat_flush_io(false);
+
 			/* Exit loop if we reached inclusive recovery target */
 			if (recoveryStopsAfter(xlogreader))
 			{
@@ -3246,6 +3250,7 @@ XLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr, int reqLen,
 	uint32		targetPageOff;
 	XLogSegNo	targetSegNo PG_USED_FOR_ASSERTS_ONLY;
 	int			r;
+	instr_time	io_start;
 
 	XLByteToSeg(targetPagePtr, targetSegNo, wal_segment_size);
 	targetPageOff = XLogSegmentOffset(targetPagePtr, wal_segment_size);
@@ -3338,6 +3343,8 @@ retry:
 	/* Read the requested page */
 	readOff = targetPageOff;
 
+	io_start = pgstat_prepare_io_time(track_wal_io_timing);
+
 	pgstat_report_wait_start(WAIT_EVENT_WAL_READ);
 	r = pg_pread(readFile, readBuf, XLOG_BLCKSZ, (off_t) readOff);
 	if (r != XLOG_BLCKSZ)
@@ -3366,6 +3373,9 @@ retry:
 	}
 	pgstat_report_wait_end();
 
+	pgstat_count_io_op_time(IOOBJECT_WAL, IOCONTEXT_NORMAL, IOOP_READ,
+							io_start, 1);
+
 	Assert(targetSegNo == readSegNo);
 	Assert(targetPageOff == readOff);
 	Assert(reqLen <= readLen);
diff --git a/src/backend/utils/activity/pgstat_io.c b/src/backend/utils/activity/pgstat_io.c
index 7263a80c72..36bf5ce708 100644
--- a/src/backend/utils/activity/pgstat_io.c
+++ b/src/backend/utils/activity/pgstat_io.c
@@ -18,6 +18,7 @@
 
 #include "executor/instrument.h"
 #include "storage/bufmgr.h"
+#include "access/xlog.h"
 #include "utils/pgstat_internal.h"
 
 
@@ -114,6 +115,22 @@ pgstat_prepare_io_time(bool track_time_guc)
 	return io_start;
 }
 
+/*
+ * Decide if the io timing needs be tracked.
+ */
+bool
+pgstat_should_track_io_time(IOObject io_object, IOContext io_context)
+{
+	/*
+	 * io times of IOOBJECT_WAL IOObject needs to be tracked when
+	 * 'track_wal_io_timing' is set regardless of 'track_io_timing'.
+	 */
+	if (io_object == IOOBJECT_WAL)
+		return track_wal_io_timing;
+
+	return track_io_timing;
+}
+
 /*
  * Like pgstat_count_io_op_n() except it also accumulates time.
  */
@@ -121,7 +138,27 @@ void
 pgstat_count_io_op_time(IOObject io_object, IOContext io_context, IOOp io_op,
 						instr_time start_time, uint32 cnt)
 {
-	if (track_io_timing)
+	/*
+	 * Accumulate different type of times here. We want to eventually
+	 * deduplicate these counters, so we are consolidating them first. This
+	 * also makes it easy to compare what is tracked for which stats or
+	 * instrumentation purpose.
+	 *
+	 * Some of the IO counters didn't moved here because they track at a
+	 * different level of granularity or at a different point in the call
+	 * stack.
+	 *
+	 * pgstat_count_buffer is for pgstat_database. Since pg_stat_database only
+	 * counts blk_read_time and blk_write_time, it is set for IOOP_READ and
+	 * IOOP_WRITE.
+	 *
+	 * pgBufferUsage is for EXPLAIN. pgBufferUsage has only write and read
+	 * stats for shared/local and temporary blocks. Only shared/local blocks
+	 * are counted here.
+	 *
+	 * At the end of the if case, accumulate time for the pg_stat_io.
+	 */
+	if (pgstat_should_track_io_time(io_object, io_context))
 	{
 		instr_time	io_time;
 
@@ -149,6 +186,7 @@ pgstat_count_io_op_time(IOObject io_object, IOContext io_context, IOOp io_op,
 					   io_time);
 	}
 
+	/* The IO timings are counted, now count the IO numbers */
 	pgstat_count_io_op_n(io_object, io_context, io_op, cnt);
 }
 
@@ -229,12 +267,33 @@ pgstat_get_io_context_name(IOContext io_context)
 			return "normal";
 		case IOCONTEXT_VACUUM:
 			return "vacuum";
+		case IOCONTEXT_INIT:
+			return "init";
 	}
 
 	elog(ERROR, "unrecognized IOContext value: %d", io_context);
 	pg_unreachable();
 }
 
+/*
+ * op_bytes can change according to IOObject and IOContext.
+ * Return BLCKSZ as default because most of the
+ * IOObject / IOContext uses BLCKSZ.
+ */
+int
+pgstat_get_io_op_bytes(IOObject io_object, IOContext io_context)
+{
+	if (io_object == IOOBJECT_WAL)
+	{
+		if (io_context == IOCONTEXT_NORMAL)
+			return XLOG_BLCKSZ;
+		else if (io_context == IOCONTEXT_INIT)
+			return wal_segment_size;
+	}
+
+	return BLCKSZ;
+}
+
 const char *
 pgstat_get_io_object_name(IOObject io_object)
 {
@@ -244,6 +303,8 @@ pgstat_get_io_object_name(IOObject io_object)
 			return "relation";
 		case IOOBJECT_TEMP_RELATION:
 			return "temp relation";
+		case IOOBJECT_WAL:
+			return "wal";
 	}
 
 	elog(ERROR, "unrecognized IOObject value: %d", io_object);
@@ -325,10 +386,10 @@ pgstat_tracks_io_bktype(BackendType bktype)
 		case B_INVALID:
 		case B_ARCHIVER:
 		case B_LOGGER:
-		case B_WAL_RECEIVER:
-		case B_WAL_WRITER:
 			return false;
 
+		case B_WAL_RECEIVER:
+		case B_WAL_WRITER:
 		case B_AUTOVAC_LAUNCHER:
 		case B_AUTOVAC_WORKER:
 		case B_BACKEND:
@@ -363,6 +424,15 @@ pgstat_tracks_io_object(BackendType bktype, IOObject io_object,
 	if (!pgstat_tracks_io_bktype(bktype))
 		return false;
 
+	/*
+	 * Currently, IO on IOOBJECT_WAL IOObject can only occur in the
+	 * IOCONTEXT_NORMAL and IOCONTEXT_INIT IOContext.
+	 */
+	if (io_object == IOOBJECT_WAL &&
+		(io_context != IOCONTEXT_NORMAL &&
+		 io_context != IOCONTEXT_INIT))
+		return false;
+
 	/*
 	 * Currently, IO on temporary relations can only occur in the
 	 * IOCONTEXT_NORMAL IOContext.
@@ -437,6 +507,33 @@ pgstat_tracks_io_op(BackendType bktype, IOObject io_object,
 		 bktype == B_CHECKPOINTER) && io_op == IOOP_EXTEND)
 		return false;
 
+	/*
+	 * Some BackendTypes / IOObjects will not do certain IOOps.
+	 */
+	if (io_object == IOOBJECT_WAL && io_op == IOOP_READ)
+	{
+		switch (bktype)
+		{
+			case B_STANDALONE_BACKEND:
+			case B_STARTUP:
+				break;
+
+			case B_INVALID:
+			case B_ARCHIVER:
+			case B_AUTOVAC_LAUNCHER:
+			case B_AUTOVAC_WORKER:
+			case B_BACKEND:
+			case B_BG_WORKER:
+			case B_BG_WRITER:
+			case B_CHECKPOINTER:
+			case B_LOGGER:
+			case B_WAL_RECEIVER:
+			case B_WAL_SENDER:
+			case B_WAL_WRITER:
+				return false;
+		}
+	}
+
 	/*
 	 * Temporary tables are not logged and thus do not require fsync'ing.
 	 * Writeback is not requested for temporary tables.
@@ -461,6 +558,27 @@ pgstat_tracks_io_op(BackendType bktype, IOObject io_object,
 	if (!strategy_io_context && io_op == IOOP_REUSE)
 		return false;
 
+	/*
+	 * Some IOOps are not valid in certain IOContexts / IOObjects and some
+	 * IOOps are only valid in certain IOContexts / IOObjects.
+	 */
+
+	/*
+	 * In IOOBJECT_WAL io_object, IOCONTEXT_INIT io_context means operations
+	 * done while creating new WAL segments.
+	 */
+	if (io_object == IOOBJECT_WAL && io_context == IOCONTEXT_INIT &&
+		!(io_op == IOOP_WRITE || io_op == IOOP_FSYNC))
+		return false;
+
+	/*
+	 * In IOOBJECT_WAL io_object, IOCONTEXT_NORMAL io_context means operations
+	 * done on already created WAL segments.
+	 */
+	if (io_object == IOOBJECT_WAL && io_context == IOCONTEXT_NORMAL &&
+		!(io_op == IOOP_WRITE || io_op == IOOP_READ || io_op == IOOP_FSYNC))
+		return false;
+
 	/*
 	 * IOOP_FSYNC IOOps done by a backend using a BufferAccessStrategy are
 	 * counted in the IOCONTEXT_NORMAL IOContext. See comment in
diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c
index 0cea320c00..8d14a4183c 100644
--- a/src/backend/utils/adt/pgstatfuncs.c
+++ b/src/backend/utils/adt/pgstatfuncs.c
@@ -1377,6 +1377,7 @@ pg_stat_get_io(PG_FUNCTION_ARGS)
 			for (int io_context = 0; io_context < IOCONTEXT_NUM_TYPES; io_context++)
 			{
 				const char *context_name = pgstat_get_io_context_name(io_context);
+				int			op_bytes;
 
 				Datum		values[IO_NUM_COLUMNS] = {0};
 				bool		nulls[IO_NUM_COLUMNS] = {0};
@@ -1395,12 +1396,11 @@ pg_stat_get_io(PG_FUNCTION_ARGS)
 				values[IO_COL_RESET_TIME] = TimestampTzGetDatum(reset_time);
 
 				/*
-				 * Hard-code this to the value of BLCKSZ for now. Future
-				 * values could include XLOG_BLCKSZ, once WAL IO is tracked,
-				 * and constant multipliers, once non-block-oriented IO (e.g.
-				 * temporary file IO) is tracked.
+				 * op_bytes can change according to IOObject and IOContext.
+				 * Get the correct op_bytes.
 				 */
-				values[IO_COL_CONVERSION] = Int64GetDatum(BLCKSZ);
+				op_bytes = pgstat_get_io_op_bytes(io_obj, io_context);
+				values[IO_COL_CONVERSION] = Int64GetDatum(op_bytes);
 
 				for (int io_op = 0; io_op < IOOP_NUM_TYPES; io_op++)
 				{
diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index f95d8db0c4..2a3d131dce 100644
--- a/src/include/pgstat.h
+++ b/src/include/pgstat.h
@@ -276,9 +276,10 @@ typedef enum IOObject
 {
 	IOOBJECT_RELATION,
 	IOOBJECT_TEMP_RELATION,
+	IOOBJECT_WAL,
 } IOObject;
 
-#define IOOBJECT_NUM_TYPES (IOOBJECT_TEMP_RELATION + 1)
+#define IOOBJECT_NUM_TYPES (IOOBJECT_WAL + 1)
 
 typedef enum IOContext
 {
@@ -286,9 +287,10 @@ typedef enum IOContext
 	IOCONTEXT_BULKWRITE,
 	IOCONTEXT_NORMAL,
 	IOCONTEXT_VACUUM,
+	IOCONTEXT_INIT,
 } IOContext;
 
-#define IOCONTEXT_NUM_TYPES (IOCONTEXT_VACUUM + 1)
+#define IOCONTEXT_NUM_TYPES (IOCONTEXT_INIT + 1)
 
 typedef enum IOOp
 {
@@ -519,11 +521,13 @@ extern bool pgstat_bktype_io_stats_valid(PgStat_BktypeIO *backend_io,
 										 BackendType bktype);
 extern void pgstat_count_io_op(IOObject io_object, IOContext io_context, IOOp io_op);
 extern void pgstat_count_io_op_n(IOObject io_object, IOContext io_context, IOOp io_op, uint32 cnt);
-extern instr_time pgstat_prepare_io_time(void);
+extern instr_time pgstat_prepare_io_time(bool track_time_guc);
+extern bool pgstat_should_track_io_time(IOObject io_object, IOContext io_context);
 extern void pgstat_count_io_op_time(IOObject io_object, IOContext io_context,
 									IOOp io_op, instr_time start_time, uint32 cnt);
 
 extern PgStat_IO *pgstat_fetch_stat_io(void);
+extern int	pgstat_get_io_op_bytes(IOObject io_object, IOContext io_context);
 extern const char *pgstat_get_io_context_name(IOContext io_context);
 extern const char *pgstat_get_io_object_name(IOObject io_object);
 
-- 
2.43.0

From e439ddc128c9b9ea1e2832f339e872c27d308499 Mon Sep 17 00:00:00 2001
From: Nazir Bilal Yavuz <byavuz81@gmail.com>
Date: Tue, 12 Dec 2023 10:33:55 +0300
Subject: [PATCH v6 1/6] Use timing GUCs on pgstat_prepare_io_time function

---
 src/backend/storage/buffer/bufmgr.c    | 10 +++++-----
 src/backend/storage/buffer/localbuf.c  |  4 ++--
 src/backend/storage/smgr/md.c          |  4 ++--
 src/backend/utils/activity/pgstat_io.c | 13 +++++++++++--
 4 files changed, 20 insertions(+), 11 deletions(-)

diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index f7c67d504c..a652907fc9 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -1143,7 +1143,7 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
 		MemSet((char *) bufBlock, 0, BLCKSZ);
 	else
 	{
-		instr_time	io_start = pgstat_prepare_io_time();
+		instr_time	io_start = pgstat_prepare_io_time(track_io_timing);
 
 		smgrread(smgr, forkNum, blockNum, bufBlock);
 
@@ -2070,7 +2070,7 @@ ExtendBufferedRelShared(BufferManagerRelation bmr,
 		}
 	}
 
-	io_start = pgstat_prepare_io_time();
+	io_start = pgstat_prepare_io_time(track_io_timing);
 
 	/*
 	 * Note: if smgrzeroextend fails, we will end up with buffers that are
@@ -3523,7 +3523,7 @@ FlushBuffer(BufferDesc *buf, SMgrRelation reln, IOObject io_object,
 	 */
 	bufToWrite = PageSetChecksumCopy((Page) bufBlock, buf->tag.blockNum);
 
-	io_start = pgstat_prepare_io_time();
+	io_start = pgstat_prepare_io_time(track_io_timing);
 
 	/*
 	 * bufToWrite is either the shared buffer or a copy, as appropriate.
@@ -4181,7 +4181,7 @@ FlushRelationBuffers(Relation rel)
 
 				PageSetChecksumInplace(localpage, bufHdr->tag.blockNum);
 
-				io_start = pgstat_prepare_io_time();
+				io_start = pgstat_prepare_io_time(track_io_timing);
 
 				smgrwrite(RelationGetSmgr(rel),
 						  BufTagGetForkNum(&bufHdr->tag),
@@ -5614,7 +5614,7 @@ IssuePendingWritebacks(WritebackContext *wb_context, IOContext io_context)
 	sort_pending_writebacks(wb_context->pending_writebacks,
 							wb_context->nr_pending);
 
-	io_start = pgstat_prepare_io_time();
+	io_start = pgstat_prepare_io_time(track_io_timing);
 
 	/*
 	 * Coalesce neighbouring writes, but nothing else. For that we iterate
diff --git a/src/backend/storage/buffer/localbuf.c b/src/backend/storage/buffer/localbuf.c
index aebcf146b4..903ea97b85 100644
--- a/src/backend/storage/buffer/localbuf.c
+++ b/src/backend/storage/buffer/localbuf.c
@@ -246,7 +246,7 @@ GetLocalVictimBuffer(void)
 
 		PageSetChecksumInplace(localpage, bufHdr->tag.blockNum);
 
-		io_start = pgstat_prepare_io_time();
+		io_start = pgstat_prepare_io_time(track_io_timing);
 
 		/* And write... */
 		smgrwrite(oreln,
@@ -411,7 +411,7 @@ ExtendBufferedRelLocal(BufferManagerRelation bmr,
 		}
 	}
 
-	io_start = pgstat_prepare_io_time();
+	io_start = pgstat_prepare_io_time(track_io_timing);
 
 	/* actually extend relation */
 	smgrzeroextend(bmr.smgr, fork, first_block, extend_by, false);
diff --git a/src/backend/storage/smgr/md.c b/src/backend/storage/smgr/md.c
index fdecbad170..62b2e79106 100644
--- a/src/backend/storage/smgr/md.c
+++ b/src/backend/storage/smgr/md.c
@@ -1172,7 +1172,7 @@ register_dirty_segment(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg)
 		ereport(DEBUG1,
 				(errmsg_internal("could not forward fsync request because request queue is full")));
 
-		io_start = pgstat_prepare_io_time();
+		io_start = pgstat_prepare_io_time(track_io_timing);
 
 		if (FileSync(seg->mdfd_vfd, WAIT_EVENT_DATA_FILE_SYNC) < 0)
 			ereport(data_sync_elevel(ERROR),
@@ -1569,7 +1569,7 @@ mdsyncfiletag(const FileTag *ftag, char *path)
 		need_to_close = true;
 	}
 
-	io_start = pgstat_prepare_io_time();
+	io_start = pgstat_prepare_io_time(track_io_timing);
 
 	/* Sync the file. */
 	result = FileSync(file, WAIT_EVENT_DATA_FILE_SYNC);
diff --git a/src/backend/utils/activity/pgstat_io.c b/src/backend/utils/activity/pgstat_io.c
index 490d5a9ab7..7263a80c72 100644
--- a/src/backend/utils/activity/pgstat_io.c
+++ b/src/backend/utils/activity/pgstat_io.c
@@ -92,14 +92,23 @@ pgstat_count_io_op_n(IOObject io_object, IOContext io_context, IOOp io_op, uint3
 	have_iostats = true;
 }
 
+/*
+ * Prepares the io_time for pgstat_count_io_op_time() function. This returns
+ * the current time if the 'track_time_guc' is true, otherwise returns zero.
+ */
 instr_time
-pgstat_prepare_io_time(void)
+pgstat_prepare_io_time(bool track_time_guc)
 {
 	instr_time	io_start;
 
-	if (track_io_timing)
+	if (track_time_guc)
 		INSTR_TIME_SET_CURRENT(io_start);
 	else
+
+		/*
+		 * If the time won't be tracked, there is no need to set io_start but
+		 * compiler complains about uninitialized use. So, set it to zero.
+		 */
 		INSTR_TIME_SET_ZERO(io_start);
 
 	return io_start;
-- 
2.43.0

Reply via email to