Hi!

On 13.09.2024 18:20, Fujii Masao wrote:

If I understand correctly, restartpoints_timed and restartpoints_done were
separated because a restartpoint can be skipped. restartpoints_timed counts
when a restartpoint is triggered by a timeout, whether it runs or not,
while restartpoints_done only tracks completed restartpoints.

Similarly, I believe checkpoints should be handled the same way.
Checkpoints can also be skipped when the system is idle, but currently,
num_timed counts even the skipped ones, despite its documentation stating
it's the "Number of scheduled checkpoints that have been performed."

Why not separate num_timed into something like checkpoints_timed and
checkpoints_done to reflect these different counters?

+1
This idea seems quite tenable to me.

There is a small clarification. Now if there were no skipped restartpoints then
restartpoints_done will be equal to restartpoints_timed + restartpoints_req.
Similar for checkpoints.
So i tried to introduce num_done counter for checkpoints in the patch attached.

I'm not sure should we include testing for the case when num_done is less than
num_timed + num_requested to the regress tests. I haven't been able to get it 
in a short time yet.

E.g. such a case may be obtained when an a error "checkpoints are
occurring too frequently" as follows:
-set checkpoint_timeout = 30 and checkpoint_warning = 40 in the postgresql.conf
-start server
-do periodically bulk insertions in the 1st client (e.g. insert into test 
values (generate_series(1,1E7));)
-watch for pg_stat_checkpointer in the 2nd one:
# SELECT CURRENT_TIME; select * from pg_stat_checkpointer;
# \watch

After some time, in the log will appear:
2024-09-16 16:38:47.888 MSK [193733] LOG:  checkpoints are occurring too 
frequently (13 seconds apart)
2024-09-16 16:38:47.888 MSK [193733] HINT:  Consider increasing the configuration 
parameter "max_wal_size".

And num_timed + num_requested will become greater than num_done.

Would be nice to find some simpler and faster way.


With the best regards,

--
Anton A. Melnikov
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company
From fcd0b61d1f1718dbf664cb3509aad16543d65375 Mon Sep 17 00:00:00 2001
From: "Anton A. Melnikov" <a.melni...@postgrespro.ru>
Date: Mon, 16 Sep 2024 16:12:07 +0300
Subject: [PATCH] Introduce num_done counter in the pg_stat_checkpointer view
 that reflects number of really performed checkpoints.

---
 doc/src/sgml/monitoring.sgml                  | 13 +++++-
 src/backend/catalog/system_views.sql          |  1 +
 src/backend/postmaster/checkpointer.c         |  2 +
 .../utils/activity/pgstat_checkpointer.c      |  2 +
 src/backend/utils/adt/pgstatfuncs.c           |  6 +++
 src/include/catalog/pg_proc.dat               | 40 +++++++++++--------
 src/include/pgstat.h                          |  1 +
 src/test/regress/expected/rules.out           |  1 +
 8 files changed, 47 insertions(+), 19 deletions(-)

diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml
index 933de6fe07f..dad7e236a43 100644
--- a/doc/src/sgml/monitoring.sgml
+++ b/doc/src/sgml/monitoring.sgml
@@ -3051,7 +3051,7 @@ description | Waiting for a newly initialized WAL file to reach durable storage
        <structfield>num_timed</structfield> <type>bigint</type>
       </para>
       <para>
-       Number of scheduled checkpoints that have been performed
+       Number of scheduled checkpoints
       </para></entry>
      </row>
 
@@ -3060,7 +3060,16 @@ description | Waiting for a newly initialized WAL file to reach durable storage
        <structfield>num_requested</structfield> <type>bigint</type>
       </para>
       <para>
-       Number of requested checkpoints that have been performed
+       Number of backend requested checkpoints
+      </para></entry>
+     </row>
+
+      <row>
+      <entry role="catalog_table_entry"><para role="column_definition">
+       <structfield>num_done</structfield> <type>bigint</type>
+      </para>
+      <para>
+       Number of checkpoints that have been performed
       </para></entry>
      </row>
 
diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql
index 7fd5d256a18..49109dbdc86 100644
--- a/src/backend/catalog/system_views.sql
+++ b/src/backend/catalog/system_views.sql
@@ -1138,6 +1138,7 @@ CREATE VIEW pg_stat_checkpointer AS
     SELECT
         pg_stat_get_checkpointer_num_timed() AS num_timed,
         pg_stat_get_checkpointer_num_requested() AS num_requested,
+        pg_stat_get_checkpointer_num_performed() AS num_done,
         pg_stat_get_checkpointer_restartpoints_timed() AS restartpoints_timed,
         pg_stat_get_checkpointer_restartpoints_requested() AS restartpoints_req,
         pg_stat_get_checkpointer_restartpoints_performed() AS restartpoints_done,
diff --git a/src/backend/postmaster/checkpointer.c b/src/backend/postmaster/checkpointer.c
index eeb73c85726..06ad2f52f27 100644
--- a/src/backend/postmaster/checkpointer.c
+++ b/src/backend/postmaster/checkpointer.c
@@ -495,6 +495,8 @@ CheckpointerMain(char *startup_data, size_t startup_data_len)
 
 				if (do_restartpoint)
 					PendingCheckpointerStats.restartpoints_performed++;
+				else
+					PendingCheckpointerStats.num_performed++;
 			}
 			else
 			{
diff --git a/src/backend/utils/activity/pgstat_checkpointer.c b/src/backend/utils/activity/pgstat_checkpointer.c
index bbfc9c7e183..4a0a2d1493a 100644
--- a/src/backend/utils/activity/pgstat_checkpointer.c
+++ b/src/backend/utils/activity/pgstat_checkpointer.c
@@ -49,6 +49,7 @@ pgstat_report_checkpointer(void)
 #define CHECKPOINTER_ACC(fld) stats_shmem->stats.fld += PendingCheckpointerStats.fld
 	CHECKPOINTER_ACC(num_timed);
 	CHECKPOINTER_ACC(num_requested);
+	CHECKPOINTER_ACC(num_performed);
 	CHECKPOINTER_ACC(restartpoints_timed);
 	CHECKPOINTER_ACC(restartpoints_requested);
 	CHECKPOINTER_ACC(restartpoints_performed);
@@ -127,6 +128,7 @@ pgstat_checkpointer_snapshot_cb(void)
 #define CHECKPOINTER_COMP(fld) pgStatLocal.snapshot.checkpointer.fld -= reset.fld;
 	CHECKPOINTER_COMP(num_timed);
 	CHECKPOINTER_COMP(num_requested);
+	CHECKPOINTER_COMP(num_performed);
 	CHECKPOINTER_COMP(restartpoints_timed);
 	CHECKPOINTER_COMP(restartpoints_requested);
 	CHECKPOINTER_COMP(restartpoints_performed);
diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c
index 33c7b25560b..5624bef18e7 100644
--- a/src/backend/utils/adt/pgstatfuncs.c
+++ b/src/backend/utils/adt/pgstatfuncs.c
@@ -1191,6 +1191,12 @@ pg_stat_get_checkpointer_num_requested(PG_FUNCTION_ARGS)
 	PG_RETURN_INT64(pgstat_fetch_stat_checkpointer()->num_requested);
 }
 
+Datum
+pg_stat_get_checkpointer_num_performed(PG_FUNCTION_ARGS)
+{
+	PG_RETURN_INT64(pgstat_fetch_stat_checkpointer()->num_performed);
+}
+
 Datum
 pg_stat_get_checkpointer_restartpoints_timed(PG_FUNCTION_ARGS)
 {
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index 53a081ed886..c8c1b79ff63 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -5807,42 +5807,58 @@
   proargmodes => '{o,o,o,o,o,o,o}',
   proargnames => '{archived_count,last_archived_wal,last_archived_time,failed_count,last_failed_wal,last_failed_time,stats_reset}',
   prosrc => 'pg_stat_get_archiver' },
-{ oid => '2769',
+{ oid => '6347',
   descr => 'statistics: number of timed checkpoints started by the checkpointer',
   proname => 'pg_stat_get_checkpointer_num_timed', provolatile => 's',
   proparallel => 'r', prorettype => 'int8', proargtypes => '',
   prosrc => 'pg_stat_get_checkpointer_num_timed' },
-{ oid => '2770',
+{ oid => '6348',
   descr => 'statistics: number of backend requested checkpoints started by the checkpointer',
   proname => 'pg_stat_get_checkpointer_num_requested', provolatile => 's',
   proparallel => 'r', prorettype => 'int8', proargtypes => '',
   prosrc => 'pg_stat_get_checkpointer_num_requested' },
-{ oid => '6327',
+{ oid => '6349',
+  descr => 'statistics: number of checkpoints performed by the checkpointer',
+  proname => 'pg_stat_get_checkpointer_num_performed',
+  provolatile => 's', proparallel => 'r', prorettype => 'int8',
+  proargtypes => '',
+  prosrc => 'pg_stat_get_checkpointer_num_performed' },
+{ oid => '6350',
   descr => 'statistics: number of timed restartpoints started by the checkpointer',
   proname => 'pg_stat_get_checkpointer_restartpoints_timed', provolatile => 's',
   proparallel => 'r', prorettype => 'int8', proargtypes => '',
   prosrc => 'pg_stat_get_checkpointer_restartpoints_timed' },
-{ oid => '6328',
+{ oid => '6351',
   descr => 'statistics: number of backend requested restartpoints started by the checkpointer',
   proname => 'pg_stat_get_checkpointer_restartpoints_requested',
   provolatile => 's', proparallel => 'r', prorettype => 'int8',
   proargtypes => '',
   prosrc => 'pg_stat_get_checkpointer_restartpoints_requested' },
-{ oid => '6329',
+{ oid => '6352',
   descr => 'statistics: number of backend performed restartpoints',
   proname => 'pg_stat_get_checkpointer_restartpoints_performed',
   provolatile => 's', proparallel => 'r', prorettype => 'int8',
   proargtypes => '',
   prosrc => 'pg_stat_get_checkpointer_restartpoints_performed' },
-{ oid => '2771',
+{ oid => '6353',
   descr => 'statistics: number of buffers written during checkpoints and restartpoints',
   proname => 'pg_stat_get_checkpointer_buffers_written', provolatile => 's',
   proparallel => 'r', prorettype => 'int8', proargtypes => '',
   prosrc => 'pg_stat_get_checkpointer_buffers_written' },
-{ oid => '6314', descr => 'statistics: last reset for the checkpointer',
+{ oid => '6354', descr => 'statistics: last reset for the checkpointer',
   proname => 'pg_stat_get_checkpointer_stat_reset_time', provolatile => 's',
   proparallel => 'r', prorettype => 'timestamptz', proargtypes => '',
   prosrc => 'pg_stat_get_checkpointer_stat_reset_time' },
+{ oid => '6355',
+  descr => 'statistics: checkpoint/restartpoint time spent writing buffers to disk, in milliseconds',
+  proname => 'pg_stat_get_checkpointer_write_time', provolatile => 's',
+  proparallel => 'r', prorettype => 'float8', proargtypes => '',
+  prosrc => 'pg_stat_get_checkpointer_write_time' },
+{ oid => '6356',
+  descr => 'statistics: checkpoint/restartpoint time spent synchronizing buffers to disk, in milliseconds',
+  proname => 'pg_stat_get_checkpointer_sync_time', provolatile => 's',
+  proparallel => 'r', prorettype => 'float8', proargtypes => '',
+  prosrc => 'pg_stat_get_checkpointer_sync_time' },
 { oid => '2772',
   descr => 'statistics: number of buffers written by the bgwriter for cleaning dirty buffers',
   proname => 'pg_stat_get_bgwriter_buf_written_clean', provolatile => 's',
@@ -5857,16 +5873,6 @@
   proname => 'pg_stat_get_bgwriter_stat_reset_time', provolatile => 's',
   proparallel => 'r', prorettype => 'timestamptz', proargtypes => '',
   prosrc => 'pg_stat_get_bgwriter_stat_reset_time' },
-{ oid => '3160',
-  descr => 'statistics: checkpoint/restartpoint time spent writing buffers to disk, in milliseconds',
-  proname => 'pg_stat_get_checkpointer_write_time', provolatile => 's',
-  proparallel => 'r', prorettype => 'float8', proargtypes => '',
-  prosrc => 'pg_stat_get_checkpointer_write_time' },
-{ oid => '3161',
-  descr => 'statistics: checkpoint/restartpoint time spent synchronizing buffers to disk, in milliseconds',
-  proname => 'pg_stat_get_checkpointer_sync_time', provolatile => 's',
-  proparallel => 'r', prorettype => 'float8', proargtypes => '',
-  prosrc => 'pg_stat_get_checkpointer_sync_time' },
 { oid => '2859', descr => 'statistics: number of buffer allocations',
   proname => 'pg_stat_get_buf_alloc', provolatile => 's', proparallel => 'r',
   prorettype => 'int8', proargtypes => '', prosrc => 'pg_stat_get_buf_alloc' },
diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index be2c91168a1..39850d8f14f 100644
--- a/src/include/pgstat.h
+++ b/src/include/pgstat.h
@@ -294,6 +294,7 @@ typedef struct PgStat_CheckpointerStats
 {
 	PgStat_Counter num_timed;
 	PgStat_Counter num_requested;
+	PgStat_Counter num_performed;
 	PgStat_Counter restartpoints_timed;
 	PgStat_Counter restartpoints_requested;
 	PgStat_Counter restartpoints_performed;
diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out
index a1626f3fae9..f5434d8365c 100644
--- a/src/test/regress/expected/rules.out
+++ b/src/test/regress/expected/rules.out
@@ -1824,6 +1824,7 @@ pg_stat_bgwriter| SELECT pg_stat_get_bgwriter_buf_written_clean() AS buffers_cle
     pg_stat_get_bgwriter_stat_reset_time() AS stats_reset;
 pg_stat_checkpointer| SELECT pg_stat_get_checkpointer_num_timed() AS num_timed,
     pg_stat_get_checkpointer_num_requested() AS num_requested,
+    pg_stat_get_checkpointer_num_performed() AS num_done,
     pg_stat_get_checkpointer_restartpoints_timed() AS restartpoints_timed,
     pg_stat_get_checkpointer_restartpoints_requested() AS restartpoints_req,
     pg_stat_get_checkpointer_restartpoints_performed() AS restartpoints_done,
-- 
2.46.0

Reply via email to