Attached is a rebased version in light of 8aaa04b32d

- Melanie
From 789d4bf1fb749a26523dbcd2c69795916b711c68 Mon Sep 17 00:00:00 2001
From: Melanie Plageman <melanieplage...@gmail.com>
Date: Tue, 21 Mar 2023 16:00:55 -0400
Subject: [PATCH v8 1/4] Count IO time for temp relation writes

Both pgstat_database and pgBufferUsage write times failed to count
timing for flushes of dirty local buffers when acquiring a new local
buffer for a temporary relation block.
---
 src/backend/storage/buffer/localbuf.c | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/src/backend/storage/buffer/localbuf.c b/src/backend/storage/buffer/localbuf.c
index 6f9e7eda57..ecccb6c1a9 100644
--- a/src/backend/storage/buffer/localbuf.c
+++ b/src/backend/storage/buffer/localbuf.c
@@ -114,6 +114,8 @@ LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum,
 	LocalBufferLookupEnt *hresult;
 	BufferDesc *bufHdr;
 	int			b;
+	instr_time	io_start,
+				io_time;
 	int			trycounter;
 	bool		found;
 	uint32		buf_state;
@@ -220,6 +222,11 @@ LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum,
 
 		PageSetChecksumInplace(localpage, bufHdr->tag.blockNum);
 
+		if (track_io_timing)
+			INSTR_TIME_SET_CURRENT(io_start);
+		else
+			INSTR_TIME_SET_ZERO(io_start);
+
 		/* And write... */
 		smgrwrite(oreln,
 				  BufTagGetForkNum(&bufHdr->tag),
@@ -233,6 +240,15 @@ LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum,
 
 		/* Temporary table I/O does not use Buffer Access Strategies */
 		pgstat_count_io_op(IOOBJECT_TEMP_RELATION, IOCONTEXT_NORMAL, IOOP_WRITE);
+
+		if (track_io_timing)
+		{
+			INSTR_TIME_SET_CURRENT(io_time);
+			INSTR_TIME_SUBTRACT(io_time, io_start);
+			pgstat_count_buffer_write_time(INSTR_TIME_GET_MICROSEC(io_time));
+			INSTR_TIME_ADD(pgBufferUsage.blk_write_time, io_time);
+		}
+
 		pgBufferUsage.local_blks_written++;
 	}
 
-- 
2.37.2

From 726ab546a11707baa167106d5b6266451dfae445 Mon Sep 17 00:00:00 2001
From: Melanie Plageman <melanieplage...@gmail.com>
Date: Tue, 21 Mar 2023 20:36:10 -0400
Subject: [PATCH v8 4/4] pgstat_database uses pgstat_io time counters

Use pgstat_io's pending counters to increment pgStatBlockWriteTime and
pgStatBlockReadTime.
---
 src/backend/utils/activity/pgstat_io.c | 14 ++++++++------
 src/include/pgstat.h                   |  4 ----
 2 files changed, 8 insertions(+), 10 deletions(-)

diff --git a/src/backend/utils/activity/pgstat_io.c b/src/backend/utils/activity/pgstat_io.c
index 4e98c4749a..905566decd 100644
--- a/src/backend/utils/activity/pgstat_io.c
+++ b/src/backend/utils/activity/pgstat_io.c
@@ -124,13 +124,11 @@ pgstat_count_io_op_time(IOObject io_object, IOContext io_context, IOOp io_op,
 
 		if (io_op == IOOP_WRITE)
 		{
-			pgstat_count_buffer_write_time(INSTR_TIME_GET_MICROSEC(io_time));
 			if (io_object == IOOBJECT_RELATION)
 				INSTR_TIME_ADD(pgBufferUsage.blk_write_time, io_time);
 		}
 		else if (io_op == IOOP_READ)
 		{
-			pgstat_count_buffer_read_time(INSTR_TIME_GET_MICROSEC(io_time));
 			if (io_object == IOOBJECT_RELATION)
 				INSTR_TIME_ADD(pgBufferUsage.blk_read_time, io_time);
 		}
@@ -181,15 +179,19 @@ pgstat_flush_io(bool nowait)
 		{
 			for (int io_op = 0; io_op < IOOP_NUM_TYPES; io_op++)
 			{
-				instr_time	time;
+				PgStat_Counter time;
 
 				bktype_shstats->counts[io_object][io_context][io_op] +=
 					PendingIOStats.counts[io_object][io_context][io_op];
 
-				time = PendingIOStats.pending_times[io_object][io_context][io_op];
+				time = INSTR_TIME_GET_MICROSEC(PendingIOStats.pending_times[io_object][io_context][io_op]);
 
-				bktype_shstats->times[io_object][io_context][io_op] +=
-					INSTR_TIME_GET_MICROSEC(time);
+				bktype_shstats->times[io_object][io_context][io_op] += time;
+
+				if (io_op == IOOP_WRITE)
+					pgStatBlockWriteTime += time;
+				else if (io_op == IOOP_READ)
+					pgStatBlockReadTime += time;
 			}
 		}
 	}
diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index bf54c6defe..833476a2bb 100644
--- a/src/include/pgstat.h
+++ b/src/include/pgstat.h
@@ -544,10 +544,6 @@ extern void pgstat_report_checksum_failures_in_db(Oid dboid, int failurecount);
 extern void pgstat_report_checksum_failure(void);
 extern void pgstat_report_connect(Oid dboid);
 
-#define pgstat_count_buffer_read_time(n)							\
-	(pgStatBlockReadTime += (n))
-#define pgstat_count_buffer_write_time(n)							\
-	(pgStatBlockWriteTime += (n))
 #define pgstat_count_conn_active_time(n)							\
 	(pgStatActiveTime += (n))
 #define pgstat_count_conn_txn_idle_time(n)							\
-- 
2.37.2

From 2bdad725133395ded199ecc726096e052d6e654b Mon Sep 17 00:00:00 2001
From: Melanie Plageman <melanieplage...@gmail.com>
Date: Fri, 31 Mar 2023 15:32:36 -0400
Subject: [PATCH v8 3/4] Track IO times in pg_stat_io

Add IO timing for reads, writes, extends, and fsyncs to pg_stat_io.

Reviewed-by: Bertrand Drouvot <bertranddrouvot...@gmail.com>
Reviewed-by: Andres Freund <and...@anarazel.de>
Discussion: https://www.postgresql.org/message-id/flat/CAAKRu_ay5iKmnbXZ3DsauViF3eMxu4m1oNnJXqV_HyqYeg55Ww%40mail.gmail.com
---
 doc/src/sgml/monitoring.sgml           |  59 ++++++++++
 src/backend/catalog/system_views.sql   |   4 +
 src/backend/storage/buffer/bufmgr.c    |  67 ++++--------
 src/backend/storage/buffer/localbuf.c  |  21 +---
 src/backend/storage/smgr/md.c          |  31 ++++--
 src/backend/utils/activity/pgstat_io.c | 143 +++++++++++++++++++++----
 src/backend/utils/adt/pgstatfuncs.c    |  48 ++++++++-
 src/include/catalog/pg_proc.dat        |   6 +-
 src/include/pgstat.h                   |   7 +-
 src/test/regress/expected/rules.out    |   6 +-
 10 files changed, 287 insertions(+), 105 deletions(-)

diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml
index d5a45f996d..7b17d38887 100644
--- a/doc/src/sgml/monitoring.sgml
+++ b/doc/src/sgml/monitoring.sgml
@@ -3814,6 +3814,18 @@ SELECT pid, wait_event_type, wait_event FROM pg_stat_activity WHERE wait_event i
       </entry>
      </row>
 
+     <row>
+      <entry role="catalog_table_entry">
+       <para role="column_definition">
+        <structfield>read_time</structfield> <type>double precision</type>
+       </para>
+       <para>
+        Time spent in read operations in milliseconds (if <xref
+        linkend="guc-track-io-timing"/> is enabled, otherwise zero)
+       </para>
+      </entry>
+     </row>
+
      <row>
       <entry role="catalog_table_entry">
        <para role="column_definition">
@@ -3826,6 +3838,18 @@ SELECT pid, wait_event_type, wait_event FROM pg_stat_activity WHERE wait_event i
       </entry>
      </row>
 
+     <row>
+      <entry role="catalog_table_entry">
+       <para role="column_definition">
+        <structfield>write_time</structfield> <type>double precision</type>
+       </para>
+       <para>
+        Time spent in write operations in milliseconds (if <xref
+        linkend="guc-track-io-timing"/> is enabled, otherwise zero)
+       </para>
+      </entry>
+     </row>
+
      <row>
       <entry role="catalog_table_entry">
        <para role="column_definition">
@@ -3838,6 +3862,18 @@ SELECT pid, wait_event_type, wait_event FROM pg_stat_activity WHERE wait_event i
       </entry>
      </row>
 
+     <row>
+      <entry role="catalog_table_entry">
+       <para role="column_definition">
+        <structfield>extend_time</structfield> <type>double precision</type>
+       </para>
+       <para>
+        Time spent in extend operations in milliseconds (if <xref
+        linkend="guc-track-io-timing"/> is enabled, otherwise zero)
+       </para>
+      </entry>
+     </row>
+
      <row>
       <entry role="catalog_table_entry">
        <para role="column_definition">
@@ -3913,6 +3949,18 @@ SELECT pid, wait_event_type, wait_event FROM pg_stat_activity WHERE wait_event i
       </entry>
      </row>
 
+     <row>
+      <entry role="catalog_table_entry">
+       <para role="column_definition">
+        <structfield>fsync_time</structfield> <type>double precision</type>
+       </para>
+       <para>
+        Time spent in fsync operations in milliseconds (if <xref
+        linkend="guc-track-io-timing"/> is enabled, otherwise zero)
+       </para>
+      </entry>
+     </row>
+
      <row>
       <entry role="catalog_table_entry">
        <para role="column_definition">
@@ -3978,6 +4026,17 @@ SELECT pid, wait_event_type, wait_event FROM pg_stat_activity WHERE wait_event i
    </itemizedlist>
   </para>
 
+  <note>
+   <para>
+    Columns tracking I/O time will only be non-zero when <xref
+    linkend="guc-track-io-timing"/> is enabled. The user should be careful when
+    using these columns in combination with their corresponding operations to
+    ensure that <varname>track_io_timing</varname> was enabled for the entire
+    time since the last reset.
+   </para>
+  </note>
+
+
 
  </sect2>
 
diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql
index 574cbc2e44..ee2973c0a6 100644
--- a/src/backend/catalog/system_views.sql
+++ b/src/backend/catalog/system_views.sql
@@ -1125,13 +1125,17 @@ SELECT
        b.io_object,
        b.io_context,
        b.reads,
+       b.read_time,
        b.writes,
+       b.write_time,
        b.extends,
+       b.extend_time,
        b.op_bytes,
        b.hits,
        b.evictions,
        b.reuses,
        b.fsyncs,
+       b.fsync_time,
        b.stats_reset
 FROM pg_stat_get_io() b;
 
diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index 05e98d5994..ebceff3b11 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -995,12 +995,18 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
 
 	if (isExtend)
 	{
+		instr_time	io_start;
+
 		/* new buffers are zero-filled */
 		MemSet((char *) bufBlock, 0, BLCKSZ);
+
+		io_start = pgstat_prepare_io_time();
+
 		/* don't set checksum for all-zero page */
 		smgrextend(smgr, forkNum, blockNum, bufBlock, false);
 
-		pgstat_count_io_op(io_object, io_context, IOOP_EXTEND);
+		pgstat_count_io_op_time(io_object, io_context,
+								IOOP_EXTEND, io_start);
 
 		/*
 		 * NB: we're *not* doing a ScheduleBufferTagForWriteback here;
@@ -1019,25 +1025,14 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
 			MemSet((char *) bufBlock, 0, BLCKSZ);
 		else
 		{
-			instr_time	io_start,
-						io_time;
+			instr_time	io_start;
 
-			if (track_io_timing)
-				INSTR_TIME_SET_CURRENT(io_start);
-			else
-				INSTR_TIME_SET_ZERO(io_start);
+			io_start = pgstat_prepare_io_time();
 
 			smgrread(smgr, forkNum, blockNum, bufBlock);
 
-			pgstat_count_io_op(io_object, io_context, IOOP_READ);
-
-			if (track_io_timing)
-			{
-				INSTR_TIME_SET_CURRENT(io_time);
-				INSTR_TIME_SUBTRACT(io_time, io_start);
-				pgstat_count_buffer_read_time(INSTR_TIME_GET_MICROSEC(io_time));
-				INSTR_TIME_ADD(pgBufferUsage.blk_read_time, io_time);
-			}
+			pgstat_count_io_op_time(io_object, io_context,
+									IOOP_READ, io_start);
 
 			/* check for garbage data */
 			if (!PageIsVerifiedExtended((Page) bufBlock, blockNum,
@@ -2853,8 +2848,7 @@ FlushBuffer(BufferDesc *buf, SMgrRelation reln, IOObject io_object,
 {
 	XLogRecPtr	recptr;
 	ErrorContextCallback errcallback;
-	instr_time	io_start,
-				io_time;
+	instr_time	io_start;
 	Block		bufBlock;
 	char	   *bufToWrite;
 	uint32		buf_state;
@@ -2929,10 +2923,7 @@ FlushBuffer(BufferDesc *buf, SMgrRelation reln, IOObject io_object,
 	 */
 	bufToWrite = PageSetChecksumCopy((Page) bufBlock, buf->tag.blockNum);
 
-	if (track_io_timing)
-		INSTR_TIME_SET_CURRENT(io_start);
-	else
-		INSTR_TIME_SET_ZERO(io_start);
+	io_start = pgstat_prepare_io_time();
 
 	/*
 	 * bufToWrite is either the shared buffer or a copy, as appropriate.
@@ -2961,15 +2952,8 @@ FlushBuffer(BufferDesc *buf, SMgrRelation reln, IOObject io_object,
 	 * When a strategy is not in use, the write can only be a "regular" write
 	 * of a dirty shared buffer (IOCONTEXT_NORMAL IOOP_WRITE).
 	 */
-	pgstat_count_io_op(IOOBJECT_RELATION, io_context, IOOP_WRITE);
-
-	if (track_io_timing)
-	{
-		INSTR_TIME_SET_CURRENT(io_time);
-		INSTR_TIME_SUBTRACT(io_time, io_start);
-		pgstat_count_buffer_write_time(INSTR_TIME_GET_MICROSEC(io_time));
-		INSTR_TIME_ADD(pgBufferUsage.blk_write_time, io_time);
-	}
+	pgstat_count_io_op_time(IOOBJECT_RELATION, io_context,
+							IOOP_WRITE, io_start);
 
 	pgBufferUsage.shared_blks_written++;
 
@@ -3571,14 +3555,13 @@ FlushRelationBuffers(Relation rel)
 {
 	int			i;
 	BufferDesc *bufHdr;
-	instr_time	io_start,
-				io_time;
 
 	if (RelationUsesLocalBuffers(rel))
 	{
 		for (i = 0; i < NLocBuffer; i++)
 		{
 			uint32		buf_state;
+			instr_time	io_start;
 
 			bufHdr = GetLocalBufferDescriptor(i);
 			if (BufTagMatchesRelFileLocator(&bufHdr->tag, &rel->rd_locator) &&
@@ -3598,10 +3581,7 @@ FlushRelationBuffers(Relation rel)
 
 				PageSetChecksumInplace(localpage, bufHdr->tag.blockNum);
 
-				if (track_io_timing)
-					INSTR_TIME_SET_CURRENT(io_start);
-				else
-					INSTR_TIME_SET_ZERO(io_start);
+				io_start = pgstat_prepare_io_time();
 
 				smgrwrite(RelationGetSmgr(rel),
 						  BufTagGetForkNum(&bufHdr->tag),
@@ -3609,19 +3589,12 @@ FlushRelationBuffers(Relation rel)
 						  localpage,
 						  false);
 
-
 				buf_state &= ~(BM_DIRTY | BM_JUST_DIRTIED);
 				pg_atomic_unlocked_write_u32(&bufHdr->state, buf_state);
 
-				pgstat_count_io_op(IOOBJECT_TEMP_RELATION, IOCONTEXT_NORMAL, IOOP_WRITE);
-
-				if (track_io_timing)
-				{
-					INSTR_TIME_SET_CURRENT(io_time);
-					INSTR_TIME_SUBTRACT(io_time, io_start);
-					pgstat_count_buffer_write_time(INSTR_TIME_GET_MICROSEC(io_time));
-					INSTR_TIME_ADD(pgBufferUsage.blk_write_time, io_time);
-				}
+				pgstat_count_io_op_time(IOOBJECT_TEMP_RELATION,
+										IOCONTEXT_NORMAL, IOOP_WRITE,
+										io_start);
 
 				pgBufferUsage.local_blks_written++;
 
diff --git a/src/backend/storage/buffer/localbuf.c b/src/backend/storage/buffer/localbuf.c
index ecccb6c1a9..a70e8dc523 100644
--- a/src/backend/storage/buffer/localbuf.c
+++ b/src/backend/storage/buffer/localbuf.c
@@ -114,8 +114,6 @@ LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum,
 	LocalBufferLookupEnt *hresult;
 	BufferDesc *bufHdr;
 	int			b;
-	instr_time	io_start,
-				io_time;
 	int			trycounter;
 	bool		found;
 	uint32		buf_state;
@@ -214,6 +212,7 @@ LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum,
 	 */
 	if (buf_state & BM_DIRTY)
 	{
+		instr_time	io_start;
 		SMgrRelation oreln;
 		Page		localpage = (char *) LocalBufHdrGetBlock(bufHdr);
 
@@ -222,10 +221,7 @@ LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum,
 
 		PageSetChecksumInplace(localpage, bufHdr->tag.blockNum);
 
-		if (track_io_timing)
-			INSTR_TIME_SET_CURRENT(io_start);
-		else
-			INSTR_TIME_SET_ZERO(io_start);
+		io_start = pgstat_prepare_io_time();
 
 		/* And write... */
 		smgrwrite(oreln,
@@ -238,17 +234,8 @@ LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum,
 		buf_state &= ~BM_DIRTY;
 		pg_atomic_unlocked_write_u32(&bufHdr->state, buf_state);
 
-		/* Temporary table I/O does not use Buffer Access Strategies */
-		pgstat_count_io_op(IOOBJECT_TEMP_RELATION, IOCONTEXT_NORMAL, IOOP_WRITE);
-
-		if (track_io_timing)
-		{
-			INSTR_TIME_SET_CURRENT(io_time);
-			INSTR_TIME_SUBTRACT(io_time, io_start);
-			pgstat_count_buffer_write_time(INSTR_TIME_GET_MICROSEC(io_time));
-			INSTR_TIME_ADD(pgBufferUsage.blk_write_time, io_time);
-		}
-
+		pgstat_count_io_op_time(IOOBJECT_TEMP_RELATION, IOCONTEXT_NORMAL,
+								IOOP_WRITE, io_start);
 		pgBufferUsage.local_blks_written++;
 	}
 
diff --git a/src/backend/storage/smgr/md.c b/src/backend/storage/smgr/md.c
index 352958e1fe..51a9134d57 100644
--- a/src/backend/storage/smgr/md.c
+++ b/src/backend/storage/smgr/md.c
@@ -1030,6 +1030,19 @@ register_dirty_segment(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg)
 
 	if (!RegisterSyncRequest(&tag, SYNC_REQUEST, false /* retryOnError */ ))
 	{
+		instr_time	io_start;
+
+		ereport(DEBUG1,
+				(errmsg_internal("could not forward fsync request because request queue is full")));
+
+		io_start = pgstat_prepare_io_time();
+
+		if (FileSync(seg->mdfd_vfd, WAIT_EVENT_DATA_FILE_SYNC) < 0)
+			ereport(data_sync_elevel(ERROR),
+					(errcode_for_file_access(),
+					 errmsg("could not fsync file \"%s\": %m",
+							FilePathName(seg->mdfd_vfd))));
+
 		/*
 		 * We have no way of knowing if the current IOContext is
 		 * IOCONTEXT_NORMAL or IOCONTEXT_[BULKREAD, BULKWRITE, VACUUM] at this
@@ -1041,16 +1054,8 @@ register_dirty_segment(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg)
 		 * IOCONTEXT_NORMAL is likely clearer when investigating the number of
 		 * backend fsyncs.
 		 */
-		pgstat_count_io_op(IOOBJECT_RELATION, IOCONTEXT_NORMAL, IOOP_FSYNC);
-
-		ereport(DEBUG1,
-				(errmsg_internal("could not forward fsync request because request queue is full")));
-
-		if (FileSync(seg->mdfd_vfd, WAIT_EVENT_DATA_FILE_SYNC) < 0)
-			ereport(data_sync_elevel(ERROR),
-					(errcode_for_file_access(),
-					 errmsg("could not fsync file \"%s\": %m",
-							FilePathName(seg->mdfd_vfd))));
+		pgstat_count_io_op_time(IOOBJECT_RELATION, IOCONTEXT_NORMAL,
+								IOOP_FSYNC, io_start);
 	}
 }
 
@@ -1399,6 +1404,7 @@ int
 mdsyncfiletag(const FileTag *ftag, char *path)
 {
 	SMgrRelation reln = smgropen(ftag->rlocator, InvalidBackendId);
+	instr_time	io_start;
 	File		file;
 	bool		need_to_close;
 	int			result,
@@ -1425,6 +1431,8 @@ mdsyncfiletag(const FileTag *ftag, char *path)
 		need_to_close = true;
 	}
 
+	io_start = pgstat_prepare_io_time();
+
 	/* Sync the file. */
 	result = FileSync(file, WAIT_EVENT_DATA_FILE_SYNC);
 	save_errno = errno;
@@ -1432,7 +1440,8 @@ mdsyncfiletag(const FileTag *ftag, char *path)
 	if (need_to_close)
 		FileClose(file);
 
-	pgstat_count_io_op(IOOBJECT_RELATION, IOCONTEXT_NORMAL, IOOP_FSYNC);
+	pgstat_count_io_op_time(IOOBJECT_RELATION, IOCONTEXT_NORMAL,
+							IOOP_FSYNC, io_start);
 
 	errno = save_errno;
 	return result;
diff --git a/src/backend/utils/activity/pgstat_io.c b/src/backend/utils/activity/pgstat_io.c
index ae8bb34f78..4e98c4749a 100644
--- a/src/backend/utils/activity/pgstat_io.c
+++ b/src/backend/utils/activity/pgstat_io.c
@@ -16,45 +16,65 @@
 
 #include "postgres.h"
 
+#include "executor/instrument.h"
+#include "storage/bufmgr.h"
 #include "utils/pgstat_internal.h"
 
 
-static PgStat_BktypeIO PendingIOStats;
+typedef struct PgStat_PendingIO
+{
+	PgStat_Counter counts[IOOBJECT_NUM_TYPES][IOCONTEXT_NUM_TYPES][IOOP_NUM_TYPES];
+	instr_time	pending_times[IOOBJECT_NUM_TYPES][IOCONTEXT_NUM_TYPES][IOOP_NUM_TYPES];
+}			PgStat_PendingIO;
+
+
+static PgStat_PendingIO PendingIOStats;
 bool		have_iostats = false;
 
 
 /*
  * Check that stats have not been counted for any combination of IOObject,
- * IOContext, and IOOp which are not tracked for the passed-in BackendType. The
- * passed-in PgStat_BktypeIO must contain stats from the BackendType specified
- * by the second parameter. Caller is responsible for locking the passed-in
- * PgStat_BktypeIO, if needed.
+ * IOContext, and IOOp which are not tracked for the passed-in BackendType. If
+ * the IOOp is not counted for this combination but IO time is otherwise
+ * tracked for this IOOp, check that IO time has not been counted for this
+ * combination. If stats are tracked for this combination and IO times are
+ * non-zero, counts should be non-zero.
+ *
+ * The passed-in PgStat_BktypeIO must contain stats from the BackendType
+ * specified by the second parameter. Caller is responsible for locking the
+ * passed-in PgStat_BktypeIO, if needed.
  */
 bool
 pgstat_bktype_io_stats_valid(PgStat_BktypeIO *backend_io,
 							 BackendType bktype)
 {
-	bool		bktype_tracked = pgstat_tracks_io_bktype(bktype);
-
 	for (int io_object = 0; io_object < IOOBJECT_NUM_TYPES; io_object++)
 	{
 		for (int io_context = 0; io_context < IOCONTEXT_NUM_TYPES; io_context++)
 		{
-			/*
-			 * Don't bother trying to skip to the next loop iteration if
-			 * pgstat_tracks_io_object() would return false here. We still
-			 * need to validate that each counter is zero anyway.
-			 */
 			for (int io_op = 0; io_op < IOOP_NUM_TYPES; io_op++)
 			{
-				/* No stats, so nothing to validate */
-				if (backend_io->data[io_object][io_context][io_op] == 0)
+				/* we do track it */
+				if (pgstat_tracks_io_op(bktype, io_object, io_context, io_op))
+				{
+					/* ensure that if IO times are non-zero, counts are > 0 */
+					if (backend_io->times[io_object][io_context][io_op] != 0 &&
+						backend_io->counts[io_object][io_context][io_op] <= 0)
+						return false;
+
 					continue;
+				}
 
-				/* There are stats and there shouldn't be */
-				if (!bktype_tracked ||
-					!pgstat_tracks_io_op(bktype, io_object, io_context, io_op))
+				/* we don't track it, and it is not 0 */
+				if (backend_io->counts[io_object][io_context][io_op] != 0)
 					return false;
+
+				/* we don't track this IOOp, so make sure its IO time is zero */
+				if (pgstat_tracks_io_time(io_op) > -1)
+				{
+					if (backend_io->times[io_object][io_context][io_op] != 0)
+						return false;
+				}
 			}
 		}
 	}
@@ -62,6 +82,19 @@ pgstat_bktype_io_stats_valid(PgStat_BktypeIO *backend_io,
 	return true;
 }
 
+instr_time
+pgstat_prepare_io_time(void)
+{
+	instr_time	io_start;
+
+	if (track_io_timing)
+		INSTR_TIME_SET_CURRENT(io_start);
+	else
+		INSTR_TIME_SET_ZERO(io_start);
+
+	return io_start;
+}
+
 void
 pgstat_count_io_op(IOObject io_object, IOContext io_context, IOOp io_op)
 {
@@ -70,11 +103,44 @@ pgstat_count_io_op(IOObject io_object, IOContext io_context, IOOp io_op)
 	Assert((unsigned int) io_op < IOOP_NUM_TYPES);
 	Assert(pgstat_tracks_io_op(MyBackendType, io_object, io_context, io_op));
 
-	PendingIOStats.data[io_object][io_context][io_op]++;
+	PendingIOStats.counts[io_object][io_context][io_op]++;
 
 	have_iostats = true;
 }
 
+/*
+ * Like pgstat_count_io_op() except it also accumulates time.
+ */
+void
+pgstat_count_io_op_time(IOObject io_object, IOContext io_context, IOOp io_op,
+						instr_time start_time)
+{
+	if (track_io_timing)
+	{
+		instr_time	io_time;
+
+		INSTR_TIME_SET_CURRENT(io_time);
+		INSTR_TIME_SUBTRACT(io_time, start_time);
+
+		if (io_op == IOOP_WRITE)
+		{
+			pgstat_count_buffer_write_time(INSTR_TIME_GET_MICROSEC(io_time));
+			if (io_object == IOOBJECT_RELATION)
+				INSTR_TIME_ADD(pgBufferUsage.blk_write_time, io_time);
+		}
+		else if (io_op == IOOP_READ)
+		{
+			pgstat_count_buffer_read_time(INSTR_TIME_GET_MICROSEC(io_time));
+			if (io_object == IOOBJECT_RELATION)
+				INSTR_TIME_ADD(pgBufferUsage.blk_read_time, io_time);
+		}
+
+		INSTR_TIME_ADD(PendingIOStats.pending_times[io_object][io_context][io_op], io_time);
+	}
+
+	pgstat_count_io_op(io_object, io_context, io_op);
+}
+
 PgStat_IO *
 pgstat_fetch_stat_io(void)
 {
@@ -114,8 +180,17 @@ pgstat_flush_io(bool nowait)
 		for (int io_context = 0; io_context < IOCONTEXT_NUM_TYPES; io_context++)
 		{
 			for (int io_op = 0; io_op < IOOP_NUM_TYPES; io_op++)
-				bktype_shstats->data[io_object][io_context][io_op] +=
-					PendingIOStats.data[io_object][io_context][io_op];
+			{
+				instr_time	time;
+
+				bktype_shstats->counts[io_object][io_context][io_op] +=
+					PendingIOStats.counts[io_object][io_context][io_op];
+
+				time = PendingIOStats.pending_times[io_object][io_context][io_op];
+
+				bktype_shstats->times[io_object][io_context][io_op] +=
+					INSTR_TIME_GET_MICROSEC(time);
+			}
 		}
 	}
 
@@ -384,3 +459,31 @@ pgstat_tracks_io_op(BackendType bktype, IOObject io_object,
 
 	return true;
 }
+
+/*
+ * PgStat_BktypeIO->times contains IO times for IOOps. For simplicity this
+ * array has a spot for every IOOp. pgstat_tracks_io_time() is the source of
+ * truth for which IOOps have corresponding IO times.
+ */
+IOOp
+pgstat_tracks_io_time(IOOp io_op)
+{
+	switch (io_op)
+	{
+		case IOOP_READ:
+			return IOOP_READ;
+		case IOOP_WRITE:
+			return IOOP_WRITE;
+		case IOOP_EXTEND:
+			return IOOP_EXTEND;
+		case IOOP_FSYNC:
+			return IOOP_FSYNC;
+		case IOOP_EVICT:
+		case IOOP_HIT:
+		case IOOP_REUSE:
+			return -1;
+	}
+
+	elog(ERROR, "unrecognized IOOp value: %d", io_op);
+	pg_unreachable();
+}
diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c
index eec9f3cf9b..60382ac76a 100644
--- a/src/backend/utils/adt/pgstatfuncs.c
+++ b/src/backend/utils/adt/pgstatfuncs.c
@@ -1256,13 +1256,17 @@ typedef enum io_stat_col
 	IO_COL_IO_OBJECT,
 	IO_COL_IO_CONTEXT,
 	IO_COL_READS,
+	IO_COL_READ_TIME,
 	IO_COL_WRITES,
+	IO_COL_WRITE_TIME,
 	IO_COL_EXTENDS,
+	IO_COL_EXTEND_TIME,
 	IO_COL_CONVERSION,
 	IO_COL_HITS,
 	IO_COL_EVICTIONS,
 	IO_COL_REUSES,
 	IO_COL_FSYNCS,
+	IO_COL_FSYNC_TIME,
 	IO_COL_RESET_TIME,
 	IO_NUM_COLUMNS,
 } io_stat_col;
@@ -1296,6 +1300,28 @@ pgstat_get_io_op_index(IOOp io_op)
 	pg_unreachable();
 }
 
+/*
+ * Get the number of the column containing IO times for the specified IOOp. If
+ * the specified IOOp is one for which IO time is not tracked, return -1. Note
+ * that this function assumes that IO time for an IOOp is displayed in the view
+ * in the column directly after the IOOp counts.
+ */
+static io_stat_col
+pgstat_get_io_time_index(IOOp io_op)
+{
+	if (pgstat_tracks_io_time(io_op) == -1)
+		return -1;
+
+	return pgstat_get_io_op_index(io_op) + 1;
+}
+
+static inline
+PgStat_Counter
+pg_stat_micro_to_millisecs(PgStat_Counter val_microsec)
+{
+	return val_microsec * 0.001;
+}
+
 Datum
 pg_stat_get_io(PG_FUNCTION_ARGS)
 {
@@ -1363,20 +1389,32 @@ pg_stat_get_io(PG_FUNCTION_ARGS)
 
 				for (int io_op = 0; io_op < IOOP_NUM_TYPES; io_op++)
 				{
-					int			col_idx = pgstat_get_io_op_index(io_op);
+					PgStat_Counter count = bktype_stats->counts[io_obj][io_context][io_op];
+					int			i = pgstat_get_io_op_index(io_op);
 
 					/*
 					 * Some combinations of BackendType and IOOp, of IOContext
 					 * and IOOp, and of IOObject and IOOp are not tracked. Set
 					 * these cells in the view NULL.
 					 */
-					nulls[col_idx] = !pgstat_tracks_io_op(bktype, io_obj, io_context, io_op);
+					if (pgstat_tracks_io_op(bktype, io_obj, io_context, io_op))
+						values[i] = Int64GetDatum(count);
+					else
+						nulls[i] = true;
+				}
 
-					if (nulls[col_idx])
+				for (int io_op = 0; io_op < IOOP_NUM_TYPES; io_op++)
+				{
+					PgStat_Counter time = bktype_stats->times[io_obj][io_context][io_op];
+					int			i = pgstat_get_io_time_index(io_op);
+
+					if (i == -1)
 						continue;
 
-					values[col_idx] =
-						Int64GetDatum(bktype_stats->data[io_obj][io_context][io_op]);
+					if (!nulls[pgstat_get_io_op_index(io_op)])
+						values[i] = Float8GetDatum(pg_stat_micro_to_millisecs(time));
+					else
+						nulls[i] = true;
 				}
 
 				tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc,
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index f9f2642201..664a7ab41d 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -5749,9 +5749,9 @@
   proname => 'pg_stat_get_io', provolatile => 'v',
   prorows => '30', proretset => 't',
   proparallel => 'r', prorettype => 'record', proargtypes => '',
-  proallargtypes => '{text,text,text,int8,int8,int8,int8,int8,int8,int8,int8,timestamptz}',
-  proargmodes => '{o,o,o,o,o,o,o,o,o,o,o,o}',
-  proargnames => '{backend_type,io_object,io_context,reads,writes,extends,op_bytes,hits,evictions,reuses,fsyncs,stats_reset}',
+  proallargtypes => '{text,text,text,int8,float8,int8,float8,int8,float8,int8,int8,int8,int8,int8,float8,timestamptz}',
+  proargmodes => '{o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o}',
+  proargnames => '{backend_type,io_object,io_context,reads,read_time,writes,write_time,extends,extend_time,op_bytes,hits,evictions,reuses,fsyncs,fsync_time,stats_reset}',
   prosrc => 'pg_stat_get_io' },
 
 { oid => '1136', descr => 'statistics: information about WAL activity',
diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index 75d258d921..bf54c6defe 100644
--- a/src/include/pgstat.h
+++ b/src/include/pgstat.h
@@ -306,7 +306,8 @@ typedef enum IOOp
 
 typedef struct PgStat_BktypeIO
 {
-	PgStat_Counter data[IOOBJECT_NUM_TYPES][IOCONTEXT_NUM_TYPES][IOOP_NUM_TYPES];
+	PgStat_Counter counts[IOOBJECT_NUM_TYPES][IOCONTEXT_NUM_TYPES][IOOP_NUM_TYPES];
+	PgStat_Counter times[IOOBJECT_NUM_TYPES][IOCONTEXT_NUM_TYPES][IOOP_NUM_TYPES];
 } PgStat_BktypeIO;
 
 typedef struct PgStat_IO
@@ -515,7 +516,10 @@ extern PgStat_CheckpointerStats *pgstat_fetch_stat_checkpointer(void);
 
 extern bool pgstat_bktype_io_stats_valid(PgStat_BktypeIO *context_ops,
 										 BackendType bktype);
+extern instr_time pgstat_prepare_io_time(void);
 extern void pgstat_count_io_op(IOObject io_object, IOContext io_context, IOOp io_op);
+extern void pgstat_count_io_op_time(IOObject io_object, IOContext io_context, IOOp io_op, instr_time time);
+
 extern PgStat_IO *pgstat_fetch_stat_io(void);
 extern const char *pgstat_get_io_context_name(IOContext io_context);
 extern const char *pgstat_get_io_object_name(IOObject io_object);
@@ -525,6 +529,7 @@ extern bool pgstat_tracks_io_object(BackendType bktype,
 									IOObject io_object, IOContext io_context);
 extern bool pgstat_tracks_io_op(BackendType bktype, IOObject io_object,
 								IOContext io_context, IOOp io_op);
+extern IOOp pgstat_tracks_io_time(IOOp io_op);
 
 
 /*
diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out
index ab1aebfde4..d939d8067e 100644
--- a/src/test/regress/expected/rules.out
+++ b/src/test/regress/expected/rules.out
@@ -1881,15 +1881,19 @@ pg_stat_io| SELECT backend_type,
     io_object,
     io_context,
     reads,
+    read_time,
     writes,
+    write_time,
     extends,
+    extend_time,
     op_bytes,
     hits,
     evictions,
     reuses,
     fsyncs,
+    fsync_time,
     stats_reset
-   FROM pg_stat_get_io() b(backend_type, io_object, io_context, reads, writes, extends, op_bytes, hits, evictions, reuses, fsyncs, stats_reset);
+   FROM pg_stat_get_io() b(backend_type, io_object, io_context, reads, read_time, writes, write_time, extends, extend_time, op_bytes, hits, evictions, reuses, fsyncs, fsync_time, stats_reset);
 pg_stat_progress_analyze| SELECT s.pid,
     s.datid,
     d.datname,
-- 
2.37.2

From f4e0db5c833f33b30d4c0b4bebec1096a1745d81 Mon Sep 17 00:00:00 2001
From: Melanie Plageman <melanieplage...@gmail.com>
Date: Tue, 21 Mar 2023 18:20:44 -0400
Subject: [PATCH v8 2/4] FlushRelationBuffers() counts temp relation IO timing

Add pgstat_database and pgBufferUsage IO timing counting to
FlushRelationBuffers() for writes of temporary relations.
---
 src/backend/storage/buffer/bufmgr.c | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index b3adbbe7d2..05e98d5994 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -3571,6 +3571,8 @@ FlushRelationBuffers(Relation rel)
 {
 	int			i;
 	BufferDesc *bufHdr;
+	instr_time	io_start,
+				io_time;
 
 	if (RelationUsesLocalBuffers(rel))
 	{
@@ -3596,17 +3598,33 @@ FlushRelationBuffers(Relation rel)
 
 				PageSetChecksumInplace(localpage, bufHdr->tag.blockNum);
 
+				if (track_io_timing)
+					INSTR_TIME_SET_CURRENT(io_start);
+				else
+					INSTR_TIME_SET_ZERO(io_start);
+
 				smgrwrite(RelationGetSmgr(rel),
 						  BufTagGetForkNum(&bufHdr->tag),
 						  bufHdr->tag.blockNum,
 						  localpage,
 						  false);
 
+
 				buf_state &= ~(BM_DIRTY | BM_JUST_DIRTIED);
 				pg_atomic_unlocked_write_u32(&bufHdr->state, buf_state);
 
 				pgstat_count_io_op(IOOBJECT_TEMP_RELATION, IOCONTEXT_NORMAL, IOOP_WRITE);
 
+				if (track_io_timing)
+				{
+					INSTR_TIME_SET_CURRENT(io_time);
+					INSTR_TIME_SUBTRACT(io_time, io_start);
+					pgstat_count_buffer_write_time(INSTR_TIME_GET_MICROSEC(io_time));
+					INSTR_TIME_ADD(pgBufferUsage.blk_write_time, io_time);
+				}
+
+				pgBufferUsage.local_blks_written++;
+
 				/* Pop the error context stack */
 				error_context_stack = errcallback.previous;
 			}
-- 
2.37.2

Reply via email to