On Mon, Apr 24, 2023 at 9:29 PM Melanie Plageman
<melanieplage...@gmail.com> wrote:
> I've yet to cook up a client backend test case (e.g. with COPY). I've taken
> that as a todo.

It was trivial to see client backend writebacks in almost any scenario
once I set backend_flush_after. I wonder if it is worth mentioning the
various "*flush_after" gucs in the docs?

> I have a few outstanding questions:
>
> 1) Does it make sense for writebacks to count the number of blocks for
> which writeback was requested or the number of calls to smgrwriteback() or
> the number of actual syscalls made? We don't actually know from outside
> of mdwriteback() how many FileWriteback() calls we will make.

So, in the attached v3, I've kept the first method: writebacks are the
number of blocks which the backend has requested writeback of. I'd like
it to be clear in the docs exactly what writebacks are (so that people
know not to add them together with writes or something like that). I
made an effort but could use further docs review.

> 2) I'm a little nervous about not including IOObject in the writeback
> context. Technically, there is nothing stopping local buffer code from
> calling IssuePendingWritebacks(). Right now, local buffer code doesn't
> do ScheduleBufferTagForWriteback(). But it doesn't seem quite right to
> hardcode in IOOBJECT_RELATION when there is nothing wrong with
> requesting writeback of local buffers (AFAIK). What do you think?

I've gone ahead and added IOObject to the WritebackContext.

- Melanie
From 8db3a8319553777024379add76527b8a6908028b Mon Sep 17 00:00:00 2001
From: Melanie Plageman <melanieplage...@gmail.com>
Date: Mon, 24 Apr 2023 18:21:54 -0400
Subject: [PATCH v3] Add writeback to pg_stat_io

28e626bde00 added the notion of IOOps but neglected to include
writeback. With the addition of IO timing to pg_stat_io in ac8d53dae5,
the omission of writeback caused some confusion. Checkpointer write
timing in pg_stat_io often differed greatly from the write timing
written to the log. To fix this, add IOOp IOOP_WRITEBACK and track
writebacks and writeback timing in pg_stat_io.

Discussion: https://postgr.es/m/20230419172326.dhgyo4wrrhulovt6%40awork3.anarazel.de
---
 doc/src/sgml/monitoring.sgml          | 27 +++++++++++++++++++++++++++
 src/backend/catalog/system_views.sql  |  2 ++
 src/backend/postmaster/bgwriter.c     | 10 ++++++++--
 src/backend/storage/buffer/buf_init.c | 11 ++++++++---
 src/backend/storage/buffer/bufmgr.c   | 20 ++++++++++++++++++--
 src/backend/utils/adt/pgstatfuncs.c   |  5 +++++
 src/include/catalog/pg_proc.dat       |  6 +++---
 src/include/pgstat.h                  |  3 ++-
 src/include/storage/buf_internals.h   |  6 +++++-
 src/test/regress/expected/rules.out   |  4 +++-
 src/test/regress/expected/stats.out   |  4 ++--
 src/test/regress/sql/stats.sql        |  4 ++--
 12 files changed, 85 insertions(+), 17 deletions(-)

diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml
index 99f7f95c39..d6062b3d89 100644
--- a/doc/src/sgml/monitoring.sgml
+++ b/doc/src/sgml/monitoring.sgml
@@ -3867,6 +3867,33 @@ SELECT pid, wait_event_type, wait_event FROM pg_stat_activity WHERE wait_event i
       </entry>
      </row>
 
+     <row>
+      <entry role="catalog_table_entry">
+       <para role="column_definition">
+        <structfield>writebacks</structfield> <type>bigint</type>
+       </para>
+       <para>
+        Number of units of size <varname>op_bytes</varname> which the backend
+        requested the kernel write out to permanent storage.
+       </para>
+      </entry>
+     </row>
+
+     <row>
+      <entry role="catalog_table_entry">
+       <para role="column_definition">
+        <structfield>writeback_time</structfield> <type>double precision</type>
+       </para>
+       <para>
+        Time spent in writeback operations in milliseconds (if
+        <xref linkend="guc-track-io-timing"/> is enabled, otherwise zero). This
+        does not necessarily count the time spent by the kernel writing the
+        data out. The backend will initiate write-out of the dirty pages and
+        wait only if the request queue is full.
+       </para>
+      </entry>
+     </row>
+
      <row>
       <entry role="catalog_table_entry">
        <para role="column_definition">
diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql
index 48aacf66ee..d0c932ad0e 100644
--- a/src/backend/catalog/system_views.sql
+++ b/src/backend/catalog/system_views.sql
@@ -1131,6 +1131,8 @@ SELECT
        b.read_time,
        b.writes,
        b.write_time,
+       b.writebacks,
+       b.writeback_time,
        b.extends,
        b.extend_time,
        b.op_bytes,
diff --git a/src/backend/postmaster/bgwriter.c b/src/backend/postmaster/bgwriter.c
index caad642ec9..dbf866f262 100644
--- a/src/backend/postmaster/bgwriter.c
+++ b/src/backend/postmaster/bgwriter.c
@@ -129,7 +129,12 @@ BackgroundWriterMain(void)
 											 ALLOCSET_DEFAULT_SIZES);
 	MemoryContextSwitchTo(bgwriter_context);
 
-	WritebackContextInit(&wb_context, &bgwriter_flush_after);
+	/*
+	 * bgwriter will only request writeback for permanent relations in
+	 * IOCONTEXT_NORMAL.
+	 */
+	WritebackContextInit(&wb_context, IOOBJECT_RELATION, IOCONTEXT_NORMAL,
+						 &bgwriter_flush_after);
 
 	/*
 	 * If an exception is encountered, processing resumes here.
@@ -185,7 +190,8 @@ BackgroundWriterMain(void)
 		MemoryContextResetAndDeleteChildren(bgwriter_context);
 
 		/* re-initialize to avoid repeated errors causing problems */
-		WritebackContextInit(&wb_context, &bgwriter_flush_after);
+		WritebackContextInit(&wb_context, IOOBJECT_RELATION, IOCONTEXT_NORMAL,
+							 &bgwriter_flush_after);
 
 		/* Now we can allow interrupts again */
 		RESUME_INTERRUPTS();
diff --git a/src/backend/storage/buffer/buf_init.c b/src/backend/storage/buffer/buf_init.c
index 0057443f0c..b050e3c2aa 100644
--- a/src/backend/storage/buffer/buf_init.c
+++ b/src/backend/storage/buffer/buf_init.c
@@ -145,9 +145,14 @@ InitBufferPool(void)
 	/* Init other shared buffer-management stuff */
 	StrategyInitialize(!foundDescs);
 
-	/* Initialize per-backend file flush context */
-	WritebackContextInit(&BackendWritebackContext,
-						 &backend_flush_after);
+	/*
+	 * Initialize per-backend file flush context. IOObject is initialized to
+	 * IOOBJECT_RELATION and IOContext to IOCONTEXT_NORMAL since these are the
+	 * most likely targets for writeback. The backend can overwrite these as
+	 * appropriate.
+	 */
+	WritebackContextInit(&BackendWritebackContext, IOOBJECT_RELATION,
+						 IOCONTEXT_NORMAL, &backend_flush_after);
 }
 
 /*
diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index 1fa689052e..116910cdfe 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -1685,6 +1685,8 @@ again:
 		FlushBuffer(buf_hdr, NULL, IOOBJECT_RELATION, io_context);
 		LWLockRelease(content_lock);
 
+		BackendWritebackContext.io_object = IOOBJECT_RELATION;
+		BackendWritebackContext.io_context = io_context;
 		ScheduleBufferTagForWriteback(&BackendWritebackContext,
 									  &buf_hdr->tag);
 	}
@@ -2555,7 +2557,12 @@ BufferSync(int flags)
 	if (num_to_scan == 0)
 		return;					/* nothing to do */
 
-	WritebackContextInit(&wb_context, &checkpoint_flush_after);
+	/*
+	 * Checkpointer will only request writeback of permanent relations in
+	 * IOCONTEXT_NORMAL.
+	 */
+	WritebackContextInit(&wb_context, IOOBJECT_RELATION, IOCONTEXT_NORMAL,
+						 &checkpoint_flush_after);
 
 	TRACE_POSTGRESQL_BUFFER_SYNC_START(NBuffers, num_to_scan);
 
@@ -5433,10 +5440,13 @@ ts_ckpt_progress_comparator(Datum a, Datum b, void *arg)
  * writeback control will be performed.
  */
 void
-WritebackContextInit(WritebackContext *context, int *max_pending)
+WritebackContextInit(WritebackContext *context, IOObject io_object,
+					 IOContext io_context, int *max_pending)
 {
 	Assert(*max_pending <= WRITEBACK_MAX_PENDING_FLUSHES);
 
+	context->io_object = io_object;
+	context->io_context = io_context;
 	context->max_pending = max_pending;
 	context->nr_pending = 0;
 }
@@ -5491,6 +5501,7 @@ ScheduleBufferTagForWriteback(WritebackContext *context, BufferTag *tag)
 void
 IssuePendingWritebacks(WritebackContext *context)
 {
+	instr_time	io_start;
 	int			i;
 
 	if (context->nr_pending == 0)
@@ -5502,6 +5513,8 @@ IssuePendingWritebacks(WritebackContext *context)
 	 */
 	sort_pending_writebacks(context->pending_writebacks, context->nr_pending);
 
+	io_start = pgstat_prepare_io_time();
+
 	/*
 	 * Coalesce neighbouring writes, but nothing else. For that we iterate
 	 * through the, now sorted, array of pending flushes, and look forward to
@@ -5555,6 +5568,9 @@ IssuePendingWritebacks(WritebackContext *context)
 		smgrwriteback(reln, BufTagGetForkNum(&tag), tag.blockNum, nblocks);
 	}
 
+	pgstat_count_io_op_time(context->io_object, context->io_context,
+							IOOP_WRITEBACK, io_start, context->nr_pending);
+
 	context->nr_pending = 0;
 }
 
diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c
index 0d57b6a7c0..70da0a2de1 100644
--- a/src/backend/utils/adt/pgstatfuncs.c
+++ b/src/backend/utils/adt/pgstatfuncs.c
@@ -1268,6 +1268,8 @@ typedef enum io_stat_col
 	IO_COL_READ_TIME,
 	IO_COL_WRITES,
 	IO_COL_WRITE_TIME,
+	IO_COL_WRITEBACKS,
+	IO_COL_WRITEBACK_TIME,
 	IO_COL_EXTENDS,
 	IO_COL_EXTEND_TIME,
 	IO_COL_CONVERSION,
@@ -1303,6 +1305,8 @@ pgstat_get_io_op_index(IOOp io_op)
 			return IO_COL_REUSES;
 		case IOOP_WRITE:
 			return IO_COL_WRITES;
+		case IOOP_WRITEBACK:
+			return IO_COL_WRITEBACKS;
 	}
 
 	elog(ERROR, "unrecognized IOOp value: %d", io_op);
@@ -1322,6 +1326,7 @@ pgstat_get_io_time_index(IOOp io_op)
 	{
 		case IOOP_READ:
 		case IOOP_WRITE:
+		case IOOP_WRITEBACK:
 		case IOOP_EXTEND:
 		case IOOP_FSYNC:
 			return pgstat_get_io_op_index(io_op) + 1;
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index b2bc81b15f..e766b9b600 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -5760,9 +5760,9 @@
   proname => 'pg_stat_get_io', provolatile => 'v',
   prorows => '30', proretset => 't',
   proparallel => 'r', prorettype => 'record', proargtypes => '',
-  proallargtypes => '{text,text,text,int8,float8,int8,float8,int8,float8,int8,int8,int8,int8,int8,float8,timestamptz}',
-  proargmodes => '{o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o}',
-  proargnames => '{backend_type,object,context,reads,read_time,writes,write_time,extends,extend_time,op_bytes,hits,evictions,reuses,fsyncs,fsync_time,stats_reset}',
+  proallargtypes => '{text,text,text,int8,float8,int8,float8,int8,float8,int8,float8,int8,int8,int8,int8,int8,float8,timestamptz}',
+  proargmodes => '{o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o}',
+  proargnames => '{backend_type,object,context,reads,read_time,writes,write_time,writebacks,writeback_time,extends,extend_time,op_bytes,hits,evictions,reuses,fsyncs,fsync_time,stats_reset}',
   prosrc => 'pg_stat_get_io' },
 
 { oid => '1136', descr => 'statistics: information about WAL activity',
diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index fff4ad5b6d..57a2c0866a 100644
--- a/src/include/pgstat.h
+++ b/src/include/pgstat.h
@@ -300,9 +300,10 @@ typedef enum IOOp
 	IOOP_READ,
 	IOOP_REUSE,
 	IOOP_WRITE,
+	IOOP_WRITEBACK,
 } IOOp;
 
-#define IOOP_NUM_TYPES (IOOP_WRITE + 1)
+#define IOOP_NUM_TYPES (IOOP_WRITEBACK + 1)
 
 typedef struct PgStat_BktypeIO
 {
diff --git a/src/include/storage/buf_internals.h b/src/include/storage/buf_internals.h
index 136cf8fbaf..0921710605 100644
--- a/src/include/storage/buf_internals.h
+++ b/src/include/storage/buf_internals.h
@@ -295,6 +295,9 @@ typedef struct PendingWriteback
 /* struct forward declared in bufmgr.h */
 typedef struct WritebackContext
 {
+	IOObject	io_object;
+	IOContext	io_context;
+
 	/* pointer to the max number of writeback requests to coalesce */
 	int		   *max_pending;
 
@@ -387,7 +390,8 @@ extern PGDLLIMPORT CkptSortItem *CkptBufferIds;
  * Internal buffer management routines
  */
 /* bufmgr.c */
-extern void WritebackContextInit(WritebackContext *context, int *max_pending);
+extern void WritebackContextInit(WritebackContext *context, IOObject io_object,
+								 IOContext io_context, int *max_pending);
 extern void IssuePendingWritebacks(WritebackContext *context);
 extern void ScheduleBufferTagForWriteback(WritebackContext *context, BufferTag *tag);
 
diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out
index 69957687fe..0bd6db2bf8 100644
--- a/src/test/regress/expected/rules.out
+++ b/src/test/regress/expected/rules.out
@@ -1887,6 +1887,8 @@ pg_stat_io| SELECT backend_type,
     read_time,
     writes,
     write_time,
+    writebacks,
+    writeback_time,
     extends,
     extend_time,
     op_bytes,
@@ -1896,7 +1898,7 @@ pg_stat_io| SELECT backend_type,
     fsyncs,
     fsync_time,
     stats_reset
-   FROM pg_stat_get_io() b(backend_type, object, context, reads, read_time, writes, write_time, extends, extend_time, op_bytes, hits, evictions, reuses, fsyncs, fsync_time, stats_reset);
+   FROM pg_stat_get_io() b(backend_type, object, context, reads, read_time, writes, write_time, writebacks, writeback_time, extends, extend_time, op_bytes, hits, evictions, reuses, fsyncs, fsync_time, stats_reset);
 pg_stat_progress_analyze| SELECT s.pid,
     s.datid,
     d.datname,
diff --git a/src/test/regress/expected/stats.out b/src/test/regress/expected/stats.out
index 813d6d39ea..26ff27911a 100644
--- a/src/test/regress/expected/stats.out
+++ b/src/test/regress/expected/stats.out
@@ -1384,7 +1384,7 @@ SELECT pg_stat_have_stats('io', 0, 0);
  t
 (1 row)
 
-SELECT sum(evictions) + sum(reuses) + sum(extends) + sum(fsyncs) + sum(reads) + sum(writes) + sum(hits) AS io_stats_pre_reset
+SELECT sum(evictions) + sum(reuses) + sum(extends) + sum(fsyncs) + sum(reads) + sum(writes) + sum(writebacks) + sum(hits) AS io_stats_pre_reset
   FROM pg_stat_io \gset
 SELECT pg_stat_reset_shared('io');
  pg_stat_reset_shared 
@@ -1392,7 +1392,7 @@ SELECT pg_stat_reset_shared('io');
  
 (1 row)
 
-SELECT sum(evictions) + sum(reuses) + sum(extends) + sum(fsyncs) + sum(reads) + sum(writes) + sum(hits) AS io_stats_post_reset
+SELECT sum(evictions) + sum(reuses) + sum(extends) + sum(fsyncs) + sum(reads) + sum(writes) + sum(writebacks) + sum(hits) AS io_stats_post_reset
   FROM pg_stat_io \gset
 SELECT :io_stats_post_reset < :io_stats_pre_reset;
  ?column? 
diff --git a/src/test/regress/sql/stats.sql b/src/test/regress/sql/stats.sql
index 99a28bb79c..257e91d10d 100644
--- a/src/test/regress/sql/stats.sql
+++ b/src/test/regress/sql/stats.sql
@@ -694,10 +694,10 @@ SELECT :io_sum_bulkwrite_strategy_extends_after > :io_sum_bulkwrite_strategy_ext
 
 -- Test IO stats reset
 SELECT pg_stat_have_stats('io', 0, 0);
-SELECT sum(evictions) + sum(reuses) + sum(extends) + sum(fsyncs) + sum(reads) + sum(writes) + sum(hits) AS io_stats_pre_reset
+SELECT sum(evictions) + sum(reuses) + sum(extends) + sum(fsyncs) + sum(reads) + sum(writes) + sum(writebacks) + sum(hits) AS io_stats_pre_reset
   FROM pg_stat_io \gset
 SELECT pg_stat_reset_shared('io');
-SELECT sum(evictions) + sum(reuses) + sum(extends) + sum(fsyncs) + sum(reads) + sum(writes) + sum(hits) AS io_stats_post_reset
+SELECT sum(evictions) + sum(reuses) + sum(extends) + sum(fsyncs) + sum(reads) + sum(writes) + sum(writebacks) + sum(hits) AS io_stats_post_reset
   FROM pg_stat_io \gset
 SELECT :io_stats_post_reset < :io_stats_pre_reset;
 
-- 
2.37.2

Reply via email to