Hi!
On 13.09.2024 18:20, Fujii Masao wrote:
If I understand correctly, restartpoints_timed and restartpoints_done were
separated because a restartpoint can be skipped. restartpoints_timed counts
when a restartpoint is triggered by a timeout, whether it runs or not,
while restartpoints_done only tracks completed restartpoints.
Similarly, I believe checkpoints should be handled the same way.
Checkpoints can also be skipped when the system is idle, but currently,
num_timed counts even the skipped ones, despite its documentation stating
it's the "Number of scheduled checkpoints that have been performed."
Why not separate num_timed into something like checkpoints_timed and
checkpoints_done to reflect these different counters?
+1
This idea seems quite tenable to me.
There is a small clarification. Now if there were no skipped restartpoints then
restartpoints_done will be equal to restartpoints_timed + restartpoints_req.
Similar for checkpoints.
So i tried to introduce num_done counter for checkpoints in the patch attached.
I'm not sure should we include testing for the case when num_done is less than
num_timed + num_requested to the regress tests. I haven't been able to get it
in a short time yet.
E.g. such a case may be obtained when an a error "checkpoints are
occurring too frequently" as follows:
-set checkpoint_timeout = 30 and checkpoint_warning = 40 in the postgresql.conf
-start server
-do periodically bulk insertions in the 1st client (e.g. insert into test
values (generate_series(1,1E7));)
-watch for pg_stat_checkpointer in the 2nd one:
# SELECT CURRENT_TIME; select * from pg_stat_checkpointer;
# \watch
After some time, in the log will appear:
2024-09-16 16:38:47.888 MSK [193733] LOG: checkpoints are occurring too
frequently (13 seconds apart)
2024-09-16 16:38:47.888 MSK [193733] HINT: Consider increasing the configuration
parameter "max_wal_size".
And num_timed + num_requested will become greater than num_done.
Would be nice to find some simpler and faster way.
With the best regards,
--
Anton A. Melnikov
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company
From fcd0b61d1f1718dbf664cb3509aad16543d65375 Mon Sep 17 00:00:00 2001
From: "Anton A. Melnikov" <a.melni...@postgrespro.ru>
Date: Mon, 16 Sep 2024 16:12:07 +0300
Subject: [PATCH] Introduce num_done counter in the pg_stat_checkpointer view
that reflects number of really performed checkpoints.
---
doc/src/sgml/monitoring.sgml | 13 +++++-
src/backend/catalog/system_views.sql | 1 +
src/backend/postmaster/checkpointer.c | 2 +
.../utils/activity/pgstat_checkpointer.c | 2 +
src/backend/utils/adt/pgstatfuncs.c | 6 +++
src/include/catalog/pg_proc.dat | 40 +++++++++++--------
src/include/pgstat.h | 1 +
src/test/regress/expected/rules.out | 1 +
8 files changed, 47 insertions(+), 19 deletions(-)
diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml
index 933de6fe07f..dad7e236a43 100644
--- a/doc/src/sgml/monitoring.sgml
+++ b/doc/src/sgml/monitoring.sgml
@@ -3051,7 +3051,7 @@ description | Waiting for a newly initialized WAL file to reach durable storage
<structfield>num_timed</structfield> <type>bigint</type>
</para>
<para>
- Number of scheduled checkpoints that have been performed
+ Number of scheduled checkpoints
</para></entry>
</row>
@@ -3060,7 +3060,16 @@ description | Waiting for a newly initialized WAL file to reach durable storage
<structfield>num_requested</structfield> <type>bigint</type>
</para>
<para>
- Number of requested checkpoints that have been performed
+ Number of backend requested checkpoints
+ </para></entry>
+ </row>
+
+ <row>
+ <entry role="catalog_table_entry"><para role="column_definition">
+ <structfield>num_done</structfield> <type>bigint</type>
+ </para>
+ <para>
+ Number of checkpoints that have been performed
</para></entry>
</row>
diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql
index 7fd5d256a18..49109dbdc86 100644
--- a/src/backend/catalog/system_views.sql
+++ b/src/backend/catalog/system_views.sql
@@ -1138,6 +1138,7 @@ CREATE VIEW pg_stat_checkpointer AS
SELECT
pg_stat_get_checkpointer_num_timed() AS num_timed,
pg_stat_get_checkpointer_num_requested() AS num_requested,
+ pg_stat_get_checkpointer_num_performed() AS num_done,
pg_stat_get_checkpointer_restartpoints_timed() AS restartpoints_timed,
pg_stat_get_checkpointer_restartpoints_requested() AS restartpoints_req,
pg_stat_get_checkpointer_restartpoints_performed() AS restartpoints_done,
diff --git a/src/backend/postmaster/checkpointer.c b/src/backend/postmaster/checkpointer.c
index eeb73c85726..06ad2f52f27 100644
--- a/src/backend/postmaster/checkpointer.c
+++ b/src/backend/postmaster/checkpointer.c
@@ -495,6 +495,8 @@ CheckpointerMain(char *startup_data, size_t startup_data_len)
if (do_restartpoint)
PendingCheckpointerStats.restartpoints_performed++;
+ else
+ PendingCheckpointerStats.num_performed++;
}
else
{
diff --git a/src/backend/utils/activity/pgstat_checkpointer.c b/src/backend/utils/activity/pgstat_checkpointer.c
index bbfc9c7e183..4a0a2d1493a 100644
--- a/src/backend/utils/activity/pgstat_checkpointer.c
+++ b/src/backend/utils/activity/pgstat_checkpointer.c
@@ -49,6 +49,7 @@ pgstat_report_checkpointer(void)
#define CHECKPOINTER_ACC(fld) stats_shmem->stats.fld += PendingCheckpointerStats.fld
CHECKPOINTER_ACC(num_timed);
CHECKPOINTER_ACC(num_requested);
+ CHECKPOINTER_ACC(num_performed);
CHECKPOINTER_ACC(restartpoints_timed);
CHECKPOINTER_ACC(restartpoints_requested);
CHECKPOINTER_ACC(restartpoints_performed);
@@ -127,6 +128,7 @@ pgstat_checkpointer_snapshot_cb(void)
#define CHECKPOINTER_COMP(fld) pgStatLocal.snapshot.checkpointer.fld -= reset.fld;
CHECKPOINTER_COMP(num_timed);
CHECKPOINTER_COMP(num_requested);
+ CHECKPOINTER_COMP(num_performed);
CHECKPOINTER_COMP(restartpoints_timed);
CHECKPOINTER_COMP(restartpoints_requested);
CHECKPOINTER_COMP(restartpoints_performed);
diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c
index 33c7b25560b..5624bef18e7 100644
--- a/src/backend/utils/adt/pgstatfuncs.c
+++ b/src/backend/utils/adt/pgstatfuncs.c
@@ -1191,6 +1191,12 @@ pg_stat_get_checkpointer_num_requested(PG_FUNCTION_ARGS)
PG_RETURN_INT64(pgstat_fetch_stat_checkpointer()->num_requested);
}
+Datum
+pg_stat_get_checkpointer_num_performed(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_INT64(pgstat_fetch_stat_checkpointer()->num_performed);
+}
+
Datum
pg_stat_get_checkpointer_restartpoints_timed(PG_FUNCTION_ARGS)
{
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index 53a081ed886..c8c1b79ff63 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -5807,42 +5807,58 @@
proargmodes => '{o,o,o,o,o,o,o}',
proargnames => '{archived_count,last_archived_wal,last_archived_time,failed_count,last_failed_wal,last_failed_time,stats_reset}',
prosrc => 'pg_stat_get_archiver' },
-{ oid => '2769',
+{ oid => '6347',
descr => 'statistics: number of timed checkpoints started by the checkpointer',
proname => 'pg_stat_get_checkpointer_num_timed', provolatile => 's',
proparallel => 'r', prorettype => 'int8', proargtypes => '',
prosrc => 'pg_stat_get_checkpointer_num_timed' },
-{ oid => '2770',
+{ oid => '6348',
descr => 'statistics: number of backend requested checkpoints started by the checkpointer',
proname => 'pg_stat_get_checkpointer_num_requested', provolatile => 's',
proparallel => 'r', prorettype => 'int8', proargtypes => '',
prosrc => 'pg_stat_get_checkpointer_num_requested' },
-{ oid => '6327',
+{ oid => '6349',
+ descr => 'statistics: number of checkpoints performed by the checkpointer',
+ proname => 'pg_stat_get_checkpointer_num_performed',
+ provolatile => 's', proparallel => 'r', prorettype => 'int8',
+ proargtypes => '',
+ prosrc => 'pg_stat_get_checkpointer_num_performed' },
+{ oid => '6350',
descr => 'statistics: number of timed restartpoints started by the checkpointer',
proname => 'pg_stat_get_checkpointer_restartpoints_timed', provolatile => 's',
proparallel => 'r', prorettype => 'int8', proargtypes => '',
prosrc => 'pg_stat_get_checkpointer_restartpoints_timed' },
-{ oid => '6328',
+{ oid => '6351',
descr => 'statistics: number of backend requested restartpoints started by the checkpointer',
proname => 'pg_stat_get_checkpointer_restartpoints_requested',
provolatile => 's', proparallel => 'r', prorettype => 'int8',
proargtypes => '',
prosrc => 'pg_stat_get_checkpointer_restartpoints_requested' },
-{ oid => '6329',
+{ oid => '6352',
descr => 'statistics: number of backend performed restartpoints',
proname => 'pg_stat_get_checkpointer_restartpoints_performed',
provolatile => 's', proparallel => 'r', prorettype => 'int8',
proargtypes => '',
prosrc => 'pg_stat_get_checkpointer_restartpoints_performed' },
-{ oid => '2771',
+{ oid => '6353',
descr => 'statistics: number of buffers written during checkpoints and restartpoints',
proname => 'pg_stat_get_checkpointer_buffers_written', provolatile => 's',
proparallel => 'r', prorettype => 'int8', proargtypes => '',
prosrc => 'pg_stat_get_checkpointer_buffers_written' },
-{ oid => '6314', descr => 'statistics: last reset for the checkpointer',
+{ oid => '6354', descr => 'statistics: last reset for the checkpointer',
proname => 'pg_stat_get_checkpointer_stat_reset_time', provolatile => 's',
proparallel => 'r', prorettype => 'timestamptz', proargtypes => '',
prosrc => 'pg_stat_get_checkpointer_stat_reset_time' },
+{ oid => '6355',
+ descr => 'statistics: checkpoint/restartpoint time spent writing buffers to disk, in milliseconds',
+ proname => 'pg_stat_get_checkpointer_write_time', provolatile => 's',
+ proparallel => 'r', prorettype => 'float8', proargtypes => '',
+ prosrc => 'pg_stat_get_checkpointer_write_time' },
+{ oid => '6356',
+ descr => 'statistics: checkpoint/restartpoint time spent synchronizing buffers to disk, in milliseconds',
+ proname => 'pg_stat_get_checkpointer_sync_time', provolatile => 's',
+ proparallel => 'r', prorettype => 'float8', proargtypes => '',
+ prosrc => 'pg_stat_get_checkpointer_sync_time' },
{ oid => '2772',
descr => 'statistics: number of buffers written by the bgwriter for cleaning dirty buffers',
proname => 'pg_stat_get_bgwriter_buf_written_clean', provolatile => 's',
@@ -5857,16 +5873,6 @@
proname => 'pg_stat_get_bgwriter_stat_reset_time', provolatile => 's',
proparallel => 'r', prorettype => 'timestamptz', proargtypes => '',
prosrc => 'pg_stat_get_bgwriter_stat_reset_time' },
-{ oid => '3160',
- descr => 'statistics: checkpoint/restartpoint time spent writing buffers to disk, in milliseconds',
- proname => 'pg_stat_get_checkpointer_write_time', provolatile => 's',
- proparallel => 'r', prorettype => 'float8', proargtypes => '',
- prosrc => 'pg_stat_get_checkpointer_write_time' },
-{ oid => '3161',
- descr => 'statistics: checkpoint/restartpoint time spent synchronizing buffers to disk, in milliseconds',
- proname => 'pg_stat_get_checkpointer_sync_time', provolatile => 's',
- proparallel => 'r', prorettype => 'float8', proargtypes => '',
- prosrc => 'pg_stat_get_checkpointer_sync_time' },
{ oid => '2859', descr => 'statistics: number of buffer allocations',
proname => 'pg_stat_get_buf_alloc', provolatile => 's', proparallel => 'r',
prorettype => 'int8', proargtypes => '', prosrc => 'pg_stat_get_buf_alloc' },
diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index be2c91168a1..39850d8f14f 100644
--- a/src/include/pgstat.h
+++ b/src/include/pgstat.h
@@ -294,6 +294,7 @@ typedef struct PgStat_CheckpointerStats
{
PgStat_Counter num_timed;
PgStat_Counter num_requested;
+ PgStat_Counter num_performed;
PgStat_Counter restartpoints_timed;
PgStat_Counter restartpoints_requested;
PgStat_Counter restartpoints_performed;
diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out
index a1626f3fae9..f5434d8365c 100644
--- a/src/test/regress/expected/rules.out
+++ b/src/test/regress/expected/rules.out
@@ -1824,6 +1824,7 @@ pg_stat_bgwriter| SELECT pg_stat_get_bgwriter_buf_written_clean() AS buffers_cle
pg_stat_get_bgwriter_stat_reset_time() AS stats_reset;
pg_stat_checkpointer| SELECT pg_stat_get_checkpointer_num_timed() AS num_timed,
pg_stat_get_checkpointer_num_requested() AS num_requested,
+ pg_stat_get_checkpointer_num_performed() AS num_done,
pg_stat_get_checkpointer_restartpoints_timed() AS restartpoints_timed,
pg_stat_get_checkpointer_restartpoints_requested() AS restartpoints_req,
pg_stat_get_checkpointer_restartpoints_performed() AS restartpoints_done,
--
2.46.0