On 2024/09/19 19:16, Anton A. Melnikov wrote:
On 18.09.2024 21:04, Fujii Masao wrote:
- CreateCheckPoint(flags);
- ckpt_performed = true;
+ ckpt_performed = CreateCheckPoint(flags);
This change could result in the next scheduled checkpoint being
triggered in 15 seconds if a checkpoint is skipped, which isn’t
the intended behavior.
Thanks for pointing this out! This is really bug.
Rearranged the logic a bit to save the previous behavior
in the v3 attached.
Thanks for updating the patch!
I've attached the updated version (0001.patch). I made some cosmetic changes,
including reverting the switch in the entries for
pg_stat_get_checkpointer_write_time
and pg_stat_get_checkpointer_sync_time in pg_proc.dat, as I didn’t think
that change was necessary. Could you please review the latest version?
After we commit 0001.patch, how about applying 0002.patch, which updates
the documentation for the pg_stat_checkpointer view to clarify what types
of checkpoints and restartpoints each counter tracks?
In 0002.patch, I also modified the description of num_requested from
"Number of backend requested checkpoints" to remove "backend," as it can
be confusing since num_requested includes requests from sources other than
the backend. Thought?
Regards,
--
Fujii Masao
Advanced Computing Technology Center
Research and Development Headquarters
NTT DATA CORPORATION
From 859f3fecb4fb4900b6ce12f6346c5d9565fbc072 Mon Sep 17 00:00:00 2001
From: Fujii Masao <fu...@postgresql.org>
Date: Fri, 20 Sep 2024 11:33:07 +0900
Subject: [PATCH v4 1/2] Add num_done counter to the pg_stat_checkpointer view.
Checkpoints can be skipped when the server is idle. The existing num_timed and
num_requested counters in pg_stat_checkpointer track both completed and
skipped checkpoints, but there was no way to count only the completed ones.
This commit introduces the num_done counter, which tracks only completed
checkpoints, making it easier to see how many were actually performed.
Bump catalog version.
Author: Anton A. Melnikov
Reviewed-by: Fujii Masao
Discussion:
https://postgr.es/m/9ea77f40-818d-4841-9dee-158ac8f6e...@oss.nttdata.com
---
doc/src/sgml/monitoring.sgml | 11 +++++-
src/backend/access/transam/xlog.c | 9 ++++-
src/backend/catalog/system_views.sql | 1 +
src/backend/postmaster/checkpointer.c | 39 ++++++++++++-------
.../utils/activity/pgstat_checkpointer.c | 2 +
src/backend/utils/adt/pgstatfuncs.c | 6 +++
src/include/access/xlog.h | 2 +-
src/include/catalog/pg_proc.dat | 6 +++
src/include/pgstat.h | 1 +
src/test/regress/expected/rules.out | 1 +
10 files changed, 60 insertions(+), 18 deletions(-)
diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml
index a2fda4677d..19bf0164f1 100644
--- a/doc/src/sgml/monitoring.sgml
+++ b/doc/src/sgml/monitoring.sgml
@@ -3063,7 +3063,16 @@ description | Waiting for a newly initialized WAL file
to reach durable storage
<structfield>num_requested</structfield> <type>bigint</type>
</para>
<para>
- Number of requested checkpoints that have been performed
+ Number of backend requested checkpoints
+ </para></entry>
+ </row>
+
+ <row>
+ <entry role="catalog_table_entry"><para role="column_definition">
+ <structfield>num_done</structfield> <type>bigint</type>
+ </para>
+ <para>
+ Number of checkpoints that have been performed
</para></entry>
</row>
diff --git a/src/backend/access/transam/xlog.c
b/src/backend/access/transam/xlog.c
index 853ab06812..64304d77d3 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -6878,8 +6878,11 @@ update_checkpoint_display(int flags, bool restartpoint,
bool reset)
* In this case, we only insert an XLOG_CHECKPOINT_SHUTDOWN record, and it's
* both the record marking the completion of the checkpoint and the location
* from which WAL replay would begin if needed.
+ *
+ * Returns true if a new checkpoint was performed, or false if it was skipped
+ * because the system was idle.
*/
-void
+bool
CreateCheckPoint(int flags)
{
bool shutdown;
@@ -6971,7 +6974,7 @@ CreateCheckPoint(int flags)
END_CRIT_SECTION();
ereport(DEBUG1,
(errmsg_internal("checkpoint skipped
because system is idle")));
- return;
+ return false;
}
}
@@ -7353,6 +7356,8 @@ CreateCheckPoint(int flags)
CheckpointStats.ckpt_segs_added,
CheckpointStats.ckpt_segs_removed,
CheckpointStats.ckpt_segs_recycled);
+
+ return true;
}
/*
diff --git a/src/backend/catalog/system_views.sql
b/src/backend/catalog/system_views.sql
index 7fd5d256a1..49109dbdc8 100644
--- a/src/backend/catalog/system_views.sql
+++ b/src/backend/catalog/system_views.sql
@@ -1138,6 +1138,7 @@ CREATE VIEW pg_stat_checkpointer AS
SELECT
pg_stat_get_checkpointer_num_timed() AS num_timed,
pg_stat_get_checkpointer_num_requested() AS num_requested,
+ pg_stat_get_checkpointer_num_performed() AS num_done,
pg_stat_get_checkpointer_restartpoints_timed() AS restartpoints_timed,
pg_stat_get_checkpointer_restartpoints_requested() AS
restartpoints_req,
pg_stat_get_checkpointer_restartpoints_performed() AS
restartpoints_done,
diff --git a/src/backend/postmaster/checkpointer.c
b/src/backend/postmaster/checkpointer.c
index eeb73c8572..9087e3f8db 100644
--- a/src/backend/postmaster/checkpointer.c
+++ b/src/backend/postmaster/checkpointer.c
@@ -460,10 +460,7 @@ CheckpointerMain(char *startup_data, size_t
startup_data_len)
* Do the checkpoint.
*/
if (!do_restartpoint)
- {
- CreateCheckPoint(flags);
- ckpt_performed = true;
- }
+ ckpt_performed = CreateCheckPoint(flags);
else
ckpt_performed = CreateRestartPoint(flags);
@@ -484,7 +481,7 @@ CheckpointerMain(char *startup_data, size_t
startup_data_len)
ConditionVariableBroadcast(&CheckpointerShmem->done_cv);
- if (ckpt_performed)
+ if (!do_restartpoint)
{
/*
* Note we record the checkpoint start time not
end time as
@@ -493,18 +490,32 @@ CheckpointerMain(char *startup_data, size_t
startup_data_len)
*/
last_checkpoint_time = now;
- if (do_restartpoint)
-
PendingCheckpointerStats.restartpoints_performed++;
+ if (ckpt_performed)
+
PendingCheckpointerStats.num_performed++;
}
else
{
- /*
- * We were not able to perform the restartpoint
(checkpoints
- * throw an ERROR in case of error). Most
likely because we
- * have not received any new checkpoint WAL
records since the
- * last restartpoint. Try again in 15 s.
- */
- last_checkpoint_time = now - CheckPointTimeout
+ 15;
+ if (ckpt_performed)
+ {
+ /*
+ * The same as for checkpoint. Please
see the
+ * corresponding comment.
+ */
+ last_checkpoint_time = now;
+
+
PendingCheckpointerStats.restartpoints_performed++;
+ }
+ else
+ {
+ /*
+ * We were not able to perform the
restartpoint
+ * (checkpoints throw an ERROR in case
of error). Most
+ * likely because we have not received
any new checkpoint
+ * WAL records since the last
restartpoint. Try again in
+ * 15 s.
+ */
+ last_checkpoint_time = now -
CheckPointTimeout + 15;
+ }
}
ckpt_active = false;
diff --git a/src/backend/utils/activity/pgstat_checkpointer.c
b/src/backend/utils/activity/pgstat_checkpointer.c
index bbfc9c7e18..4a0a2d1493 100644
--- a/src/backend/utils/activity/pgstat_checkpointer.c
+++ b/src/backend/utils/activity/pgstat_checkpointer.c
@@ -49,6 +49,7 @@ pgstat_report_checkpointer(void)
#define CHECKPOINTER_ACC(fld) stats_shmem->stats.fld +=
PendingCheckpointerStats.fld
CHECKPOINTER_ACC(num_timed);
CHECKPOINTER_ACC(num_requested);
+ CHECKPOINTER_ACC(num_performed);
CHECKPOINTER_ACC(restartpoints_timed);
CHECKPOINTER_ACC(restartpoints_requested);
CHECKPOINTER_ACC(restartpoints_performed);
@@ -127,6 +128,7 @@ pgstat_checkpointer_snapshot_cb(void)
#define CHECKPOINTER_COMP(fld) pgStatLocal.snapshot.checkpointer.fld -=
reset.fld;
CHECKPOINTER_COMP(num_timed);
CHECKPOINTER_COMP(num_requested);
+ CHECKPOINTER_COMP(num_performed);
CHECKPOINTER_COMP(restartpoints_timed);
CHECKPOINTER_COMP(restartpoints_requested);
CHECKPOINTER_COMP(restartpoints_performed);
diff --git a/src/backend/utils/adt/pgstatfuncs.c
b/src/backend/utils/adt/pgstatfuncs.c
index 9c23ac7c8c..17b0fc02ef 100644
--- a/src/backend/utils/adt/pgstatfuncs.c
+++ b/src/backend/utils/adt/pgstatfuncs.c
@@ -1191,6 +1191,12 @@ pg_stat_get_checkpointer_num_requested(PG_FUNCTION_ARGS)
PG_RETURN_INT64(pgstat_fetch_stat_checkpointer()->num_requested);
}
+Datum
+pg_stat_get_checkpointer_num_performed(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_INT64(pgstat_fetch_stat_checkpointer()->num_performed);
+}
+
Datum
pg_stat_get_checkpointer_restartpoints_timed(PG_FUNCTION_ARGS)
{
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index 083810f5b4..36f6e4e4b4 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -239,7 +239,7 @@ extern void LocalProcessControlFile(bool reset);
extern WalLevel GetActiveWalLevelOnStandby(void);
extern void StartupXLOG(void);
extern void ShutdownXLOG(int code, Datum arg);
-extern void CreateCheckPoint(int flags);
+extern bool CreateCheckPoint(int flags);
extern bool CreateRestartPoint(int flags);
extern WALAvailability GetWALAvailability(XLogRecPtr targetLSN);
extern void XLogPutNextOid(Oid nextOid);
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index 43f608d7a0..52fca54f3a 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -5817,6 +5817,12 @@
proname => 'pg_stat_get_checkpointer_num_requested', provolatile => 's',
proparallel => 'r', prorettype => 'int8', proargtypes => '',
prosrc => 'pg_stat_get_checkpointer_num_requested' },
+{ oid => '2775',
+ descr => 'statistics: number of checkpoints performed by the checkpointer',
+ proname => 'pg_stat_get_checkpointer_num_performed',
+ provolatile => 's', proparallel => 'r', prorettype => 'int8',
+ proargtypes => '',
+ prosrc => 'pg_stat_get_checkpointer_num_performed' },
{ oid => '6327',
descr => 'statistics: number of timed restartpoints started by the
checkpointer',
proname => 'pg_stat_get_checkpointer_restartpoints_timed', provolatile =>
's',
diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index 4752dfe719..476acd680c 100644
--- a/src/include/pgstat.h
+++ b/src/include/pgstat.h
@@ -294,6 +294,7 @@ typedef struct PgStat_CheckpointerStats
{
PgStat_Counter num_timed;
PgStat_Counter num_requested;
+ PgStat_Counter num_performed;
PgStat_Counter restartpoints_timed;
PgStat_Counter restartpoints_requested;
PgStat_Counter restartpoints_performed;
diff --git a/src/test/regress/expected/rules.out
b/src/test/regress/expected/rules.out
index a1626f3fae..f5434d8365 100644
--- a/src/test/regress/expected/rules.out
+++ b/src/test/regress/expected/rules.out
@@ -1824,6 +1824,7 @@ pg_stat_bgwriter| SELECT
pg_stat_get_bgwriter_buf_written_clean() AS buffers_cle
pg_stat_get_bgwriter_stat_reset_time() AS stats_reset;
pg_stat_checkpointer| SELECT pg_stat_get_checkpointer_num_timed() AS num_timed,
pg_stat_get_checkpointer_num_requested() AS num_requested,
+ pg_stat_get_checkpointer_num_performed() AS num_done,
pg_stat_get_checkpointer_restartpoints_timed() AS restartpoints_timed,
pg_stat_get_checkpointer_restartpoints_requested() AS restartpoints_req,
pg_stat_get_checkpointer_restartpoints_performed() AS restartpoints_done,
--
2.45.2
From d9311aee5a0e7665eb0ea32928f728a8cd01fab5 Mon Sep 17 00:00:00 2001
From: Fujii Masao <fu...@postgresql.org>
Date: Sat, 21 Sep 2024 00:47:12 +0900
Subject: [PATCH v4 2/2] docs: Enhance the pg_stat_checkpointer view
documentation.
This commit updates the documentation for the pg_stat_checkpointer view
to clarify what kind of checkpoints or restartpoints each counter tracks.
This makes it easier to understand the meaning of each counter.
---
doc/src/sgml/monitoring.sgml | 19 ++++++++++++++-----
1 file changed, 14 insertions(+), 5 deletions(-)
diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml
index 19bf0164f1..3484a0490a 100644
--- a/doc/src/sgml/monitoring.sgml
+++ b/doc/src/sgml/monitoring.sgml
@@ -3051,10 +3051,7 @@ description | Waiting for a newly initialized WAL file
to reach durable storage
<structfield>num_timed</structfield> <type>bigint</type>
</para>
<para>
- Number of scheduled checkpoints due to timeout.
- Note that checkpoints may be skipped if the server has been idle
- since the last one, and this value counts both completed and
- skipped checkpoints
+ Number of scheduled checkpoints due to timeout
</para></entry>
</row>
@@ -3063,7 +3060,7 @@ description | Waiting for a newly initialized WAL file to
reach durable storage
<structfield>num_requested</structfield> <type>bigint</type>
</para>
<para>
- Number of backend requested checkpoints
+ Number of requested checkpoints
</para></entry>
</row>
@@ -3146,6 +3143,18 @@ description | Waiting for a newly initialized WAL file
to reach durable storage
</tgroup>
</table>
+ <para>
+ Checkpoints may be skipped if the server has been idle since the last one.
+ <structfield>num_timed</structfield> and
+ <structfield>num_requested</structfield> count both completed and skipped
+ checkpoints, while <structfield>num_done</structfield> tracks only
+ the completed ones. Similarly, restartpoints may be skipped
+ if the last replayed checkpoint record is already the last restartpoint.
+ <structfield>restartpoints_timed</structfield> and
+ <structfield>restartpoints_req</structfield> count both completed and
+ skipped restartpoints, while <structfield>restartpoints_done</structfield>
+ tracks only the completed ones.
+ </para>
</sect2>
<sect2 id="monitoring-pg-stat-wal-view">
--
2.45.2